1 /*- 2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * Copyright (c) 2020, Chelsio Communications. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/eventhandler.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/hash.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/module.h> 42 #include <sys/refcount.h> 43 #include <sys/rmlock.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/queue.h> 47 #include <sys/sbuf.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 54 #include <net/bpf.h> 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_clone.h> 59 #include <net/if_dl.h> 60 #include <net/if_media.h> 61 #include <net/if_types.h> 62 #include <net/if_vxlan.h> 63 #include <net/netisr.h> 64 #include <net/route.h> 65 #include <net/route/nhop.h> 66 67 #include <netinet/in.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/in_var.h> 70 #include <netinet/in_pcb.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip6.h> 73 #include <netinet/ip_var.h> 74 #include <netinet/udp.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/in_fib.h> 77 #include <netinet6/in6_fib.h> 78 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/scope6_var.h> 81 82 struct vxlan_softc; 83 LIST_HEAD(vxlan_softc_head, vxlan_softc); 84 85 struct sx vxlan_sx; 86 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock"); 87 88 struct vxlan_socket_mc_info { 89 union vxlan_sockaddr vxlsomc_saddr; 90 union vxlan_sockaddr vxlsomc_gaddr; 91 int vxlsomc_ifidx; 92 int vxlsomc_users; 93 }; 94 95 /* 96 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet. 97 */ 98 #define VXLAN_MAX_MTU (IP_MAXPACKET - \ 99 60 /* Maximum IPv4 header len */ - \ 100 sizeof(struct udphdr) - \ 101 sizeof(struct vxlan_header) - \ 102 ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) 103 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU) 104 105 #define VXLAN_SO_MC_MAX_GROUPS 32 106 107 #define VXLAN_SO_VNI_HASH_SHIFT 6 108 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) 109 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) 110 111 struct vxlan_socket { 112 struct socket *vxlso_sock; 113 struct rmlock vxlso_lock; 114 u_int vxlso_refcnt; 115 union vxlan_sockaddr vxlso_laddr; 116 LIST_ENTRY(vxlan_socket) vxlso_entry; 117 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; 118 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; 119 }; 120 121 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) 122 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) 123 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) 124 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) 125 #define VXLAN_SO_LOCK_ASSERT(_vso) \ 126 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) 127 #define VXLAN_SO_LOCK_WASSERT(_vso) \ 128 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) 129 130 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) 131 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) 132 133 struct vxlan_ftable_entry { 134 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; 135 uint16_t vxlfe_flags; 136 uint8_t vxlfe_mac[ETHER_ADDR_LEN]; 137 union vxlan_sockaddr vxlfe_raddr; 138 time_t vxlfe_expire; 139 }; 140 141 #define VXLAN_FE_FLAG_DYNAMIC 0x01 142 #define VXLAN_FE_FLAG_STATIC 0x02 143 144 #define VXLAN_FE_IS_DYNAMIC(_fe) \ 145 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) 146 147 #define VXLAN_SC_FTABLE_SHIFT 9 148 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) 149 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) 150 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ 151 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) 152 153 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); 154 155 struct vxlan_statistics { 156 uint32_t ftable_nospace; 157 uint32_t ftable_lock_upgrade_failed; 158 counter_u64_t txcsum; 159 counter_u64_t tso; 160 counter_u64_t rxcsum; 161 }; 162 163 struct vxlan_softc { 164 struct ifnet *vxl_ifp; 165 int vxl_reqcap; 166 u_int vxl_fibnum; 167 struct vxlan_socket *vxl_sock; 168 uint32_t vxl_vni; 169 union vxlan_sockaddr vxl_src_addr; 170 union vxlan_sockaddr vxl_dst_addr; 171 uint32_t vxl_flags; 172 #define VXLAN_FLAG_INIT 0x0001 173 #define VXLAN_FLAG_TEARDOWN 0x0002 174 #define VXLAN_FLAG_LEARN 0x0004 175 #define VXLAN_FLAG_USER_MTU 0x0008 176 177 uint32_t vxl_port_hash_key; 178 uint16_t vxl_min_port; 179 uint16_t vxl_max_port; 180 uint8_t vxl_ttl; 181 182 /* Lookup table from MAC address to forwarding entry. */ 183 uint32_t vxl_ftable_cnt; 184 uint32_t vxl_ftable_max; 185 uint32_t vxl_ftable_timeout; 186 uint32_t vxl_ftable_hash_key; 187 struct vxlan_ftable_head *vxl_ftable; 188 189 /* Derived from vxl_dst_addr. */ 190 struct vxlan_ftable_entry vxl_default_fe; 191 192 struct ip_moptions *vxl_im4o; 193 struct ip6_moptions *vxl_im6o; 194 195 struct rmlock vxl_lock; 196 volatile u_int vxl_refcnt; 197 198 int vxl_unit; 199 int vxl_vso_mc_index; 200 struct vxlan_statistics vxl_stats; 201 struct sysctl_oid *vxl_sysctl_node; 202 struct sysctl_ctx_list vxl_sysctl_ctx; 203 struct callout vxl_callout; 204 struct ether_addr vxl_hwaddr; 205 int vxl_mc_ifindex; 206 struct ifnet *vxl_mc_ifp; 207 struct ifmedia vxl_media; 208 char vxl_mc_ifname[IFNAMSIZ]; 209 LIST_ENTRY(vxlan_softc) vxl_entry; 210 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; 211 212 /* For rate limiting errors on the tx fast path. */ 213 struct timeval err_time; 214 int err_pps; 215 }; 216 217 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) 218 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) 219 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) 220 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) 221 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) 222 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) 223 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) 224 #define VXLAN_UNLOCK(_sc, _p) do { \ 225 if (VXLAN_LOCK_WOWNED(_sc)) \ 226 VXLAN_WUNLOCK(_sc); \ 227 else \ 228 VXLAN_RUNLOCK(_sc, _p); \ 229 } while (0) 230 231 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) 232 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) 233 234 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) 235 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) 236 237 struct vxlanudphdr { 238 struct udphdr vxlh_udp; 239 struct vxlan_header vxlh_hdr; 240 } __packed; 241 242 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); 243 static void vxlan_ftable_init(struct vxlan_softc *); 244 static void vxlan_ftable_fini(struct vxlan_softc *); 245 static void vxlan_ftable_flush(struct vxlan_softc *, int); 246 static void vxlan_ftable_expire(struct vxlan_softc *); 247 static int vxlan_ftable_update_locked(struct vxlan_softc *, 248 const union vxlan_sockaddr *, const uint8_t *, 249 struct rm_priotracker *); 250 static int vxlan_ftable_learn(struct vxlan_softc *, 251 const struct sockaddr *, const uint8_t *); 252 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); 253 254 static struct vxlan_ftable_entry * 255 vxlan_ftable_entry_alloc(void); 256 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); 257 static void vxlan_ftable_entry_init(struct vxlan_softc *, 258 struct vxlan_ftable_entry *, const uint8_t *, 259 const struct sockaddr *, uint32_t); 260 static void vxlan_ftable_entry_destroy(struct vxlan_softc *, 261 struct vxlan_ftable_entry *); 262 static int vxlan_ftable_entry_insert(struct vxlan_softc *, 263 struct vxlan_ftable_entry *); 264 static struct vxlan_ftable_entry * 265 vxlan_ftable_entry_lookup(struct vxlan_softc *, 266 const uint8_t *); 267 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, 268 struct sbuf *); 269 270 static struct vxlan_socket * 271 vxlan_socket_alloc(const union vxlan_sockaddr *); 272 static void vxlan_socket_destroy(struct vxlan_socket *); 273 static void vxlan_socket_release(struct vxlan_socket *); 274 static struct vxlan_socket * 275 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); 276 static void vxlan_socket_insert(struct vxlan_socket *); 277 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); 278 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); 279 static int vxlan_socket_create(struct ifnet *, int, 280 const union vxlan_sockaddr *, struct vxlan_socket **); 281 static void vxlan_socket_ifdetach(struct vxlan_socket *, 282 struct ifnet *, struct vxlan_softc_head *); 283 284 static struct vxlan_socket * 285 vxlan_socket_mc_lookup(const union vxlan_sockaddr *); 286 static int vxlan_sockaddr_mc_info_match( 287 const struct vxlan_socket_mc_info *, 288 const union vxlan_sockaddr *, 289 const union vxlan_sockaddr *, int); 290 static int vxlan_socket_mc_join_group(struct vxlan_socket *, 291 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 292 int *, union vxlan_sockaddr *); 293 static int vxlan_socket_mc_leave_group(struct vxlan_socket *, 294 const union vxlan_sockaddr *, 295 const union vxlan_sockaddr *, int); 296 static int vxlan_socket_mc_add_group(struct vxlan_socket *, 297 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 298 int, int *); 299 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, 300 int); 301 302 static struct vxlan_softc * 303 vxlan_socket_lookup_softc_locked(struct vxlan_socket *, 304 uint32_t); 305 static struct vxlan_softc * 306 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); 307 static int vxlan_socket_insert_softc(struct vxlan_socket *, 308 struct vxlan_softc *); 309 static void vxlan_socket_remove_softc(struct vxlan_socket *, 310 struct vxlan_softc *); 311 312 static struct ifnet * 313 vxlan_multicast_if_ref(struct vxlan_softc *, int); 314 static void vxlan_free_multicast(struct vxlan_softc *); 315 static int vxlan_setup_multicast_interface(struct vxlan_softc *); 316 317 static int vxlan_setup_multicast(struct vxlan_softc *); 318 static int vxlan_setup_socket(struct vxlan_softc *); 319 #ifdef INET6 320 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *); 321 #endif 322 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *); 323 static int vxlan_valid_init_config(struct vxlan_softc *); 324 static void vxlan_init_wait(struct vxlan_softc *); 325 static void vxlan_init_complete(struct vxlan_softc *); 326 static void vxlan_init(void *); 327 static void vxlan_release(struct vxlan_softc *); 328 static void vxlan_teardown_wait(struct vxlan_softc *); 329 static void vxlan_teardown_complete(struct vxlan_softc *); 330 static void vxlan_teardown_locked(struct vxlan_softc *); 331 static void vxlan_teardown(struct vxlan_softc *); 332 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, 333 struct vxlan_softc_head *); 334 static void vxlan_timer(void *); 335 336 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); 337 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); 338 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); 339 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); 340 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); 341 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); 342 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); 343 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); 344 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); 345 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); 346 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); 347 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); 348 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); 349 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); 350 static int vxlan_ctrl_flush(struct vxlan_softc *, void *); 351 static int vxlan_ioctl_drvspec(struct vxlan_softc *, 352 struct ifdrv *, int); 353 static int vxlan_ioctl_ifflags(struct vxlan_softc *); 354 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 355 356 #if defined(INET) || defined(INET6) 357 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); 358 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, 359 int, uint16_t, uint16_t); 360 #endif 361 static int vxlan_encap4(struct vxlan_softc *, 362 const union vxlan_sockaddr *, struct mbuf *); 363 static int vxlan_encap6(struct vxlan_softc *, 364 const union vxlan_sockaddr *, struct mbuf *); 365 static int vxlan_transmit(struct ifnet *, struct mbuf *); 366 static void vxlan_qflush(struct ifnet *); 367 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, 368 const struct sockaddr *, void *); 369 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, 370 const struct sockaddr *); 371 372 static int vxlan_stats_alloc(struct vxlan_softc *); 373 static void vxlan_stats_free(struct vxlan_softc *); 374 static void vxlan_set_default_config(struct vxlan_softc *); 375 static int vxlan_set_user_config(struct vxlan_softc *, 376 struct ifvxlanparam *); 377 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int); 378 static void vxlan_set_hwcaps(struct vxlan_softc *); 379 static int vxlan_clone_create(struct if_clone *, int, caddr_t); 380 static void vxlan_clone_destroy(struct ifnet *); 381 382 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); 383 static int vxlan_media_change(struct ifnet *); 384 static void vxlan_media_status(struct ifnet *, struct ifmediareq *); 385 386 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, 387 const struct sockaddr *); 388 static void vxlan_sockaddr_copy(union vxlan_sockaddr *, 389 const struct sockaddr *); 390 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, 391 const struct sockaddr *); 392 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, 393 const struct sockaddr *); 394 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); 395 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); 396 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); 397 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); 398 399 static int vxlan_can_change_config(struct vxlan_softc *); 400 static int vxlan_check_vni(uint32_t); 401 static int vxlan_check_ttl(int); 402 static int vxlan_check_ftable_timeout(uint32_t); 403 static int vxlan_check_ftable_max(uint32_t); 404 405 static void vxlan_sysctl_setup(struct vxlan_softc *); 406 static void vxlan_sysctl_destroy(struct vxlan_softc *); 407 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); 408 409 static void vxlan_ifdetach_event(void *, struct ifnet *); 410 static void vxlan_load(void); 411 static void vxlan_unload(void); 412 static int vxlan_modevent(module_t, int, void *); 413 414 static const char vxlan_name[] = "vxlan"; 415 static MALLOC_DEFINE(M_VXLAN, vxlan_name, 416 "Virtual eXtensible LAN Interface"); 417 static struct if_clone *vxlan_cloner; 418 419 static struct mtx vxlan_list_mtx; 420 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) 421 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) 422 423 static LIST_HEAD(, vxlan_socket) vxlan_socket_list; 424 425 static eventhandler_tag vxlan_ifdetach_event_tag; 426 427 SYSCTL_DECL(_net_link); 428 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 429 "Virtual eXtensible Local Area Network"); 430 431 static int vxlan_legacy_port = 0; 432 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port); 433 static int vxlan_reuse_port = 0; 434 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port); 435 436 /* Default maximum number of addresses in the forwarding table. */ 437 #ifndef VXLAN_FTABLE_MAX 438 #define VXLAN_FTABLE_MAX 2000 439 #endif 440 441 /* Timeout (in seconds) of addresses learned in the forwarding table. */ 442 #ifndef VXLAN_FTABLE_TIMEOUT 443 #define VXLAN_FTABLE_TIMEOUT (20 * 60) 444 #endif 445 446 /* 447 * Maximum timeout (in seconds) of addresses learned in the forwarding 448 * table. 449 */ 450 #ifndef VXLAN_FTABLE_MAX_TIMEOUT 451 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) 452 #endif 453 454 /* Number of seconds between pruning attempts of the forwarding table. */ 455 #ifndef VXLAN_FTABLE_PRUNE 456 #define VXLAN_FTABLE_PRUNE (5 * 60) 457 #endif 458 459 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; 460 461 struct vxlan_control { 462 int (*vxlc_func)(struct vxlan_softc *, void *); 463 int vxlc_argsize; 464 int vxlc_flags; 465 #define VXLAN_CTRL_FLAG_COPYIN 0x01 466 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 467 #define VXLAN_CTRL_FLAG_SUSER 0x04 468 }; 469 470 static const struct vxlan_control vxlan_control_table[] = { 471 [VXLAN_CMD_GET_CONFIG] = 472 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), 473 VXLAN_CTRL_FLAG_COPYOUT 474 }, 475 476 [VXLAN_CMD_SET_VNI] = 477 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), 478 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 479 }, 480 481 [VXLAN_CMD_SET_LOCAL_ADDR] = 482 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), 483 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 484 }, 485 486 [VXLAN_CMD_SET_REMOTE_ADDR] = 487 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), 488 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 489 }, 490 491 [VXLAN_CMD_SET_LOCAL_PORT] = 492 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), 493 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 494 }, 495 496 [VXLAN_CMD_SET_REMOTE_PORT] = 497 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), 498 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 499 }, 500 501 [VXLAN_CMD_SET_PORT_RANGE] = 502 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), 503 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 504 }, 505 506 [VXLAN_CMD_SET_FTABLE_TIMEOUT] = 507 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), 508 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 509 }, 510 511 [VXLAN_CMD_SET_FTABLE_MAX] = 512 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), 513 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 514 }, 515 516 [VXLAN_CMD_SET_MULTICAST_IF] = 517 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), 518 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 519 }, 520 521 [VXLAN_CMD_SET_TTL] = 522 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), 523 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 524 }, 525 526 [VXLAN_CMD_SET_LEARN] = 527 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), 528 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 529 }, 530 531 [VXLAN_CMD_FTABLE_ENTRY_ADD] = 532 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), 533 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 534 }, 535 536 [VXLAN_CMD_FTABLE_ENTRY_REM] = 537 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), 538 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 539 }, 540 541 [VXLAN_CMD_FLUSH] = 542 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), 543 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 544 }, 545 }; 546 547 static const int vxlan_control_table_size = nitems(vxlan_control_table); 548 549 static int 550 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) 551 { 552 int i, d; 553 554 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) 555 d = ((int)a[i]) - ((int)b[i]); 556 557 return (d); 558 } 559 560 static void 561 vxlan_ftable_init(struct vxlan_softc *sc) 562 { 563 int i; 564 565 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * 566 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); 567 568 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) 569 LIST_INIT(&sc->vxl_ftable[i]); 570 sc->vxl_ftable_hash_key = arc4random(); 571 } 572 573 static void 574 vxlan_ftable_fini(struct vxlan_softc *sc) 575 { 576 int i; 577 578 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 579 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), 580 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); 581 } 582 MPASS(sc->vxl_ftable_cnt == 0); 583 584 free(sc->vxl_ftable, M_VXLAN); 585 sc->vxl_ftable = NULL; 586 } 587 588 static void 589 vxlan_ftable_flush(struct vxlan_softc *sc, int all) 590 { 591 struct vxlan_ftable_entry *fe, *tfe; 592 int i; 593 594 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 595 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 596 if (all || VXLAN_FE_IS_DYNAMIC(fe)) 597 vxlan_ftable_entry_destroy(sc, fe); 598 } 599 } 600 } 601 602 static void 603 vxlan_ftable_expire(struct vxlan_softc *sc) 604 { 605 struct vxlan_ftable_entry *fe, *tfe; 606 int i; 607 608 VXLAN_LOCK_WASSERT(sc); 609 610 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 611 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 612 if (VXLAN_FE_IS_DYNAMIC(fe) && 613 time_uptime >= fe->vxlfe_expire) 614 vxlan_ftable_entry_destroy(sc, fe); 615 } 616 } 617 } 618 619 static int 620 vxlan_ftable_update_locked(struct vxlan_softc *sc, 621 const union vxlan_sockaddr *vxlsa, const uint8_t *mac, 622 struct rm_priotracker *tracker) 623 { 624 struct vxlan_ftable_entry *fe; 625 int error __unused; 626 627 VXLAN_LOCK_ASSERT(sc); 628 629 again: 630 /* 631 * A forwarding entry for this MAC address might already exist. If 632 * so, update it, otherwise create a new one. We may have to upgrade 633 * the lock if we have to change or create an entry. 634 */ 635 fe = vxlan_ftable_entry_lookup(sc, mac); 636 if (fe != NULL) { 637 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 638 639 if (!VXLAN_FE_IS_DYNAMIC(fe) || 640 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) 641 return (0); 642 if (!VXLAN_LOCK_WOWNED(sc)) { 643 VXLAN_RUNLOCK(sc, tracker); 644 VXLAN_WLOCK(sc); 645 sc->vxl_stats.ftable_lock_upgrade_failed++; 646 goto again; 647 } 648 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); 649 return (0); 650 } 651 652 if (!VXLAN_LOCK_WOWNED(sc)) { 653 VXLAN_RUNLOCK(sc, tracker); 654 VXLAN_WLOCK(sc); 655 sc->vxl_stats.ftable_lock_upgrade_failed++; 656 goto again; 657 } 658 659 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { 660 sc->vxl_stats.ftable_nospace++; 661 return (ENOSPC); 662 } 663 664 fe = vxlan_ftable_entry_alloc(); 665 if (fe == NULL) 666 return (ENOMEM); 667 668 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); 669 670 /* The prior lookup failed, so the insert should not. */ 671 error = vxlan_ftable_entry_insert(sc, fe); 672 MPASS(error == 0); 673 674 return (0); 675 } 676 677 static int 678 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, 679 const uint8_t *mac) 680 { 681 struct rm_priotracker tracker; 682 union vxlan_sockaddr vxlsa; 683 int error; 684 685 /* 686 * The source port may be randomly selected by the remote host, so 687 * use the port of the default destination address. 688 */ 689 vxlan_sockaddr_copy(&vxlsa, sa); 690 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 691 692 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 693 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 694 if (error) 695 return (error); 696 } 697 698 VXLAN_RLOCK(sc, &tracker); 699 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); 700 VXLAN_UNLOCK(sc, &tracker); 701 702 return (error); 703 } 704 705 static int 706 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) 707 { 708 struct rm_priotracker tracker; 709 struct sbuf sb; 710 struct vxlan_softc *sc; 711 struct vxlan_ftable_entry *fe; 712 size_t size; 713 int i, error; 714 715 /* 716 * This is mostly intended for debugging during development. It is 717 * not practical to dump an entire large table this way. 718 */ 719 720 sc = arg1; 721 size = PAGE_SIZE; /* Calculate later. */ 722 723 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); 724 sbuf_putc(&sb, '\n'); 725 726 VXLAN_RLOCK(sc, &tracker); 727 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 728 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { 729 if (sbuf_error(&sb) != 0) 730 break; 731 vxlan_ftable_entry_dump(fe, &sb); 732 } 733 } 734 VXLAN_RUNLOCK(sc, &tracker); 735 736 if (sbuf_len(&sb) == 1) 737 sbuf_setpos(&sb, 0); 738 739 sbuf_finish(&sb); 740 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 741 sbuf_delete(&sb); 742 743 return (error); 744 } 745 746 static struct vxlan_ftable_entry * 747 vxlan_ftable_entry_alloc(void) 748 { 749 struct vxlan_ftable_entry *fe; 750 751 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); 752 753 return (fe); 754 } 755 756 static void 757 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) 758 { 759 760 free(fe, M_VXLAN); 761 } 762 763 static void 764 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, 765 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) 766 { 767 768 fe->vxlfe_flags = flags; 769 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 770 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); 771 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); 772 } 773 774 static void 775 vxlan_ftable_entry_destroy(struct vxlan_softc *sc, 776 struct vxlan_ftable_entry *fe) 777 { 778 779 sc->vxl_ftable_cnt--; 780 LIST_REMOVE(fe, vxlfe_hash); 781 vxlan_ftable_entry_free(fe); 782 } 783 784 static int 785 vxlan_ftable_entry_insert(struct vxlan_softc *sc, 786 struct vxlan_ftable_entry *fe) 787 { 788 struct vxlan_ftable_entry *lfe; 789 uint32_t hash; 790 int dir; 791 792 VXLAN_LOCK_WASSERT(sc); 793 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); 794 795 lfe = LIST_FIRST(&sc->vxl_ftable[hash]); 796 if (lfe == NULL) { 797 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); 798 goto out; 799 } 800 801 do { 802 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); 803 if (dir == 0) 804 return (EEXIST); 805 if (dir > 0) { 806 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); 807 goto out; 808 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { 809 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); 810 goto out; 811 } else 812 lfe = LIST_NEXT(lfe, vxlfe_hash); 813 } while (lfe != NULL); 814 815 out: 816 sc->vxl_ftable_cnt++; 817 818 return (0); 819 } 820 821 static struct vxlan_ftable_entry * 822 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) 823 { 824 struct vxlan_ftable_entry *fe; 825 uint32_t hash; 826 int dir; 827 828 VXLAN_LOCK_ASSERT(sc); 829 hash = VXLAN_SC_FTABLE_HASH(sc, mac); 830 831 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { 832 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); 833 if (dir == 0) 834 return (fe); 835 if (dir > 0) 836 break; 837 } 838 839 return (NULL); 840 } 841 842 static void 843 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) 844 { 845 char buf[64]; 846 const union vxlan_sockaddr *sa; 847 const void *addr; 848 int i, len, af, width; 849 850 sa = &fe->vxlfe_raddr; 851 af = sa->sa.sa_family; 852 len = sbuf_len(sb); 853 854 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', 855 fe->vxlfe_flags); 856 857 for (i = 0; i < ETHER_ADDR_LEN - 1; i++) 858 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); 859 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); 860 861 if (af == AF_INET) { 862 addr = &sa->in4.sin_addr; 863 width = INET_ADDRSTRLEN - 1; 864 } else { 865 addr = &sa->in6.sin6_addr; 866 width = INET6_ADDRSTRLEN - 1; 867 } 868 inet_ntop(af, addr, buf, sizeof(buf)); 869 sbuf_printf(sb, "%*s ", width, buf); 870 871 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); 872 873 sbuf_putc(sb, '\n'); 874 875 /* Truncate a partial line. */ 876 if (sbuf_error(sb) != 0) 877 sbuf_setpos(sb, len); 878 } 879 880 static struct vxlan_socket * 881 vxlan_socket_alloc(const union vxlan_sockaddr *sa) 882 { 883 struct vxlan_socket *vso; 884 int i; 885 886 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); 887 rm_init(&vso->vxlso_lock, "vxlansorm"); 888 refcount_init(&vso->vxlso_refcnt, 0); 889 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) 890 LIST_INIT(&vso->vxlso_vni_hash[i]); 891 vso->vxlso_laddr = *sa; 892 893 return (vso); 894 } 895 896 static void 897 vxlan_socket_destroy(struct vxlan_socket *vso) 898 { 899 struct socket *so; 900 #ifdef INVARIANTS 901 int i; 902 struct vxlan_socket_mc_info *mc; 903 904 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 905 mc = &vso->vxlso_mc[i]; 906 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, 907 ("%s: socket %p mc[%d] still has address", 908 __func__, vso, i)); 909 } 910 911 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 912 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), 913 ("%s: socket %p vni_hash[%d] not empty", 914 __func__, vso, i)); 915 } 916 #endif 917 so = vso->vxlso_sock; 918 if (so != NULL) { 919 vso->vxlso_sock = NULL; 920 soclose(so); 921 } 922 923 rm_destroy(&vso->vxlso_lock); 924 free(vso, M_VXLAN); 925 } 926 927 static void 928 vxlan_socket_release(struct vxlan_socket *vso) 929 { 930 int destroy; 931 932 VXLAN_LIST_LOCK(); 933 destroy = VXLAN_SO_RELEASE(vso); 934 if (destroy != 0) 935 LIST_REMOVE(vso, vxlso_entry); 936 VXLAN_LIST_UNLOCK(); 937 938 if (destroy != 0) 939 vxlan_socket_destroy(vso); 940 } 941 942 static struct vxlan_socket * 943 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) 944 { 945 struct vxlan_socket *vso; 946 947 VXLAN_LIST_LOCK(); 948 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { 949 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { 950 VXLAN_SO_ACQUIRE(vso); 951 break; 952 } 953 } 954 VXLAN_LIST_UNLOCK(); 955 956 return (vso); 957 } 958 959 static void 960 vxlan_socket_insert(struct vxlan_socket *vso) 961 { 962 963 VXLAN_LIST_LOCK(); 964 VXLAN_SO_ACQUIRE(vso); 965 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); 966 VXLAN_LIST_UNLOCK(); 967 } 968 969 static int 970 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) 971 { 972 struct thread *td; 973 int error; 974 975 td = curthread; 976 977 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, 978 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); 979 if (error) { 980 if_printf(ifp, "cannot create socket: %d\n", error); 981 return (error); 982 } 983 984 error = udp_set_kernel_tunneling(vso->vxlso_sock, 985 vxlan_rcv_udp_packet, NULL, vso); 986 if (error) { 987 if_printf(ifp, "cannot set tunneling function: %d\n", error); 988 return (error); 989 } 990 991 if (vxlan_reuse_port != 0) { 992 struct sockopt sopt; 993 int val = 1; 994 995 bzero(&sopt, sizeof(sopt)); 996 sopt.sopt_dir = SOPT_SET; 997 sopt.sopt_level = IPPROTO_IP; 998 sopt.sopt_name = SO_REUSEPORT; 999 sopt.sopt_val = &val; 1000 sopt.sopt_valsize = sizeof(val); 1001 error = sosetopt(vso->vxlso_sock, &sopt); 1002 if (error) { 1003 if_printf(ifp, 1004 "cannot set REUSEADDR socket opt: %d\n", error); 1005 return (error); 1006 } 1007 } 1008 1009 return (0); 1010 } 1011 1012 static int 1013 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) 1014 { 1015 union vxlan_sockaddr laddr; 1016 struct thread *td; 1017 int error; 1018 1019 td = curthread; 1020 laddr = vso->vxlso_laddr; 1021 1022 error = sobind(vso->vxlso_sock, &laddr.sa, td); 1023 if (error) { 1024 if (error != EADDRINUSE) 1025 if_printf(ifp, "cannot bind socket: %d\n", error); 1026 return (error); 1027 } 1028 1029 return (0); 1030 } 1031 1032 static int 1033 vxlan_socket_create(struct ifnet *ifp, int multicast, 1034 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) 1035 { 1036 union vxlan_sockaddr laddr; 1037 struct vxlan_socket *vso; 1038 int error; 1039 1040 laddr = *saddr; 1041 1042 /* 1043 * If this socket will be multicast, then only the local port 1044 * must be specified when binding. 1045 */ 1046 if (multicast != 0) { 1047 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1048 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1049 #ifdef INET6 1050 else 1051 laddr.in6.sin6_addr = in6addr_any; 1052 #endif 1053 } 1054 1055 vso = vxlan_socket_alloc(&laddr); 1056 if (vso == NULL) 1057 return (ENOMEM); 1058 1059 error = vxlan_socket_init(vso, ifp); 1060 if (error) 1061 goto fail; 1062 1063 error = vxlan_socket_bind(vso, ifp); 1064 if (error) 1065 goto fail; 1066 1067 /* 1068 * There is a small window between the bind completing and 1069 * inserting the socket, so that a concurrent create may fail. 1070 * Let's not worry about that for now. 1071 */ 1072 vxlan_socket_insert(vso); 1073 *vsop = vso; 1074 1075 return (0); 1076 1077 fail: 1078 vxlan_socket_destroy(vso); 1079 1080 return (error); 1081 } 1082 1083 static void 1084 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, 1085 struct vxlan_softc_head *list) 1086 { 1087 struct rm_priotracker tracker; 1088 struct vxlan_softc *sc; 1089 int i; 1090 1091 VXLAN_SO_RLOCK(vso, &tracker); 1092 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 1093 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) 1094 vxlan_ifdetach(sc, ifp, list); 1095 } 1096 VXLAN_SO_RUNLOCK(vso, &tracker); 1097 } 1098 1099 static struct vxlan_socket * 1100 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) 1101 { 1102 union vxlan_sockaddr laddr; 1103 struct vxlan_socket *vso; 1104 1105 laddr = *vxlsa; 1106 1107 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1108 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1109 #ifdef INET6 1110 else 1111 laddr.in6.sin6_addr = in6addr_any; 1112 #endif 1113 1114 vso = vxlan_socket_lookup(&laddr); 1115 1116 return (vso); 1117 } 1118 1119 static int 1120 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, 1121 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1122 int ifidx) 1123 { 1124 1125 if (!vxlan_sockaddr_in_any(local) && 1126 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) 1127 return (0); 1128 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) 1129 return (0); 1130 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) 1131 return (0); 1132 1133 return (1); 1134 } 1135 1136 static int 1137 vxlan_socket_mc_join_group(struct vxlan_socket *vso, 1138 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1139 int *ifidx, union vxlan_sockaddr *source) 1140 { 1141 struct sockopt sopt; 1142 int error; 1143 1144 *source = *local; 1145 1146 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1147 struct ip_mreq mreq; 1148 1149 mreq.imr_multiaddr = group->in4.sin_addr; 1150 mreq.imr_interface = local->in4.sin_addr; 1151 1152 bzero(&sopt, sizeof(sopt)); 1153 sopt.sopt_dir = SOPT_SET; 1154 sopt.sopt_level = IPPROTO_IP; 1155 sopt.sopt_name = IP_ADD_MEMBERSHIP; 1156 sopt.sopt_val = &mreq; 1157 sopt.sopt_valsize = sizeof(mreq); 1158 error = sosetopt(vso->vxlso_sock, &sopt); 1159 if (error) 1160 return (error); 1161 1162 /* 1163 * BMV: Ideally, there would be a formal way for us to get 1164 * the local interface that was selected based on the 1165 * imr_interface address. We could then update *ifidx so 1166 * vxlan_sockaddr_mc_info_match() would return a match for 1167 * later creates that explicitly set the multicast interface. 1168 * 1169 * If we really need to, we can of course look in the INP's 1170 * membership list: 1171 * sotoinpcb(vso->vxlso_sock)->inp_moptions-> 1172 * imo_head[]->imf_inm->inm_ifp 1173 * similarly to imo_match_group(). 1174 */ 1175 source->in4.sin_addr = local->in4.sin_addr; 1176 1177 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1178 struct ipv6_mreq mreq; 1179 1180 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1181 mreq.ipv6mr_interface = *ifidx; 1182 1183 bzero(&sopt, sizeof(sopt)); 1184 sopt.sopt_dir = SOPT_SET; 1185 sopt.sopt_level = IPPROTO_IPV6; 1186 sopt.sopt_name = IPV6_JOIN_GROUP; 1187 sopt.sopt_val = &mreq; 1188 sopt.sopt_valsize = sizeof(mreq); 1189 error = sosetopt(vso->vxlso_sock, &sopt); 1190 if (error) 1191 return (error); 1192 1193 /* 1194 * BMV: As with IPv4, we would really like to know what 1195 * interface in6p_lookup_mcast_ifp() selected. 1196 */ 1197 } else 1198 error = EAFNOSUPPORT; 1199 1200 return (error); 1201 } 1202 1203 static int 1204 vxlan_socket_mc_leave_group(struct vxlan_socket *vso, 1205 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, 1206 int ifidx) 1207 { 1208 struct sockopt sopt; 1209 int error; 1210 1211 bzero(&sopt, sizeof(sopt)); 1212 sopt.sopt_dir = SOPT_SET; 1213 1214 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1215 struct ip_mreq mreq; 1216 1217 mreq.imr_multiaddr = group->in4.sin_addr; 1218 mreq.imr_interface = source->in4.sin_addr; 1219 1220 sopt.sopt_level = IPPROTO_IP; 1221 sopt.sopt_name = IP_DROP_MEMBERSHIP; 1222 sopt.sopt_val = &mreq; 1223 sopt.sopt_valsize = sizeof(mreq); 1224 error = sosetopt(vso->vxlso_sock, &sopt); 1225 1226 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1227 struct ipv6_mreq mreq; 1228 1229 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1230 mreq.ipv6mr_interface = ifidx; 1231 1232 sopt.sopt_level = IPPROTO_IPV6; 1233 sopt.sopt_name = IPV6_LEAVE_GROUP; 1234 sopt.sopt_val = &mreq; 1235 sopt.sopt_valsize = sizeof(mreq); 1236 error = sosetopt(vso->vxlso_sock, &sopt); 1237 1238 } else 1239 error = EAFNOSUPPORT; 1240 1241 return (error); 1242 } 1243 1244 static int 1245 vxlan_socket_mc_add_group(struct vxlan_socket *vso, 1246 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1247 int ifidx, int *idx) 1248 { 1249 union vxlan_sockaddr source; 1250 struct vxlan_socket_mc_info *mc; 1251 int i, empty, error; 1252 1253 /* 1254 * Within a socket, the same multicast group may be used by multiple 1255 * interfaces, each with a different network identifier. But a socket 1256 * may only join a multicast group once, so keep track of the users 1257 * here. 1258 */ 1259 1260 VXLAN_SO_WLOCK(vso); 1261 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1262 mc = &vso->vxlso_mc[i]; 1263 1264 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1265 empty++; 1266 continue; 1267 } 1268 1269 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) 1270 goto out; 1271 } 1272 VXLAN_SO_WUNLOCK(vso); 1273 1274 if (empty == 0) 1275 return (ENOSPC); 1276 1277 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); 1278 if (error) 1279 return (error); 1280 1281 VXLAN_SO_WLOCK(vso); 1282 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1283 mc = &vso->vxlso_mc[i]; 1284 1285 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1286 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); 1287 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); 1288 mc->vxlsomc_ifidx = ifidx; 1289 goto out; 1290 } 1291 } 1292 VXLAN_SO_WUNLOCK(vso); 1293 1294 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); 1295 MPASS(error == 0); 1296 1297 return (ENOSPC); 1298 1299 out: 1300 mc->vxlsomc_users++; 1301 VXLAN_SO_WUNLOCK(vso); 1302 1303 *idx = i; 1304 1305 return (0); 1306 } 1307 1308 static void 1309 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) 1310 { 1311 union vxlan_sockaddr group, source; 1312 struct vxlan_socket_mc_info *mc; 1313 int ifidx, leave; 1314 1315 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, 1316 ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); 1317 1318 leave = 0; 1319 mc = &vso->vxlso_mc[idx]; 1320 1321 VXLAN_SO_WLOCK(vso); 1322 mc->vxlsomc_users--; 1323 if (mc->vxlsomc_users == 0) { 1324 group = mc->vxlsomc_gaddr; 1325 source = mc->vxlsomc_saddr; 1326 ifidx = mc->vxlsomc_ifidx; 1327 bzero(mc, sizeof(*mc)); 1328 leave = 1; 1329 } 1330 VXLAN_SO_WUNLOCK(vso); 1331 1332 if (leave != 0) { 1333 /* 1334 * Our socket's membership in this group may have already 1335 * been removed if we joined through an interface that's 1336 * been detached. 1337 */ 1338 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); 1339 } 1340 } 1341 1342 static struct vxlan_softc * 1343 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) 1344 { 1345 struct vxlan_softc *sc; 1346 uint32_t hash; 1347 1348 VXLAN_SO_LOCK_ASSERT(vso); 1349 hash = VXLAN_SO_VNI_HASH(vni); 1350 1351 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { 1352 if (sc->vxl_vni == vni) { 1353 VXLAN_ACQUIRE(sc); 1354 break; 1355 } 1356 } 1357 1358 return (sc); 1359 } 1360 1361 static struct vxlan_softc * 1362 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) 1363 { 1364 struct rm_priotracker tracker; 1365 struct vxlan_softc *sc; 1366 1367 VXLAN_SO_RLOCK(vso, &tracker); 1368 sc = vxlan_socket_lookup_softc_locked(vso, vni); 1369 VXLAN_SO_RUNLOCK(vso, &tracker); 1370 1371 return (sc); 1372 } 1373 1374 static int 1375 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1376 { 1377 struct vxlan_softc *tsc; 1378 uint32_t vni, hash; 1379 1380 vni = sc->vxl_vni; 1381 hash = VXLAN_SO_VNI_HASH(vni); 1382 1383 VXLAN_SO_WLOCK(vso); 1384 tsc = vxlan_socket_lookup_softc_locked(vso, vni); 1385 if (tsc != NULL) { 1386 VXLAN_SO_WUNLOCK(vso); 1387 vxlan_release(tsc); 1388 return (EEXIST); 1389 } 1390 1391 VXLAN_ACQUIRE(sc); 1392 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); 1393 VXLAN_SO_WUNLOCK(vso); 1394 1395 return (0); 1396 } 1397 1398 static void 1399 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1400 { 1401 1402 VXLAN_SO_WLOCK(vso); 1403 LIST_REMOVE(sc, vxl_entry); 1404 VXLAN_SO_WUNLOCK(vso); 1405 1406 vxlan_release(sc); 1407 } 1408 1409 static struct ifnet * 1410 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) 1411 { 1412 struct ifnet *ifp; 1413 1414 VXLAN_LOCK_ASSERT(sc); 1415 1416 if (ipv4 && sc->vxl_im4o != NULL) 1417 ifp = sc->vxl_im4o->imo_multicast_ifp; 1418 else if (!ipv4 && sc->vxl_im6o != NULL) 1419 ifp = sc->vxl_im6o->im6o_multicast_ifp; 1420 else 1421 ifp = NULL; 1422 1423 if (ifp != NULL) 1424 if_ref(ifp); 1425 1426 return (ifp); 1427 } 1428 1429 static void 1430 vxlan_free_multicast(struct vxlan_softc *sc) 1431 { 1432 1433 if (sc->vxl_mc_ifp != NULL) { 1434 if_rele(sc->vxl_mc_ifp); 1435 sc->vxl_mc_ifp = NULL; 1436 sc->vxl_mc_ifindex = 0; 1437 } 1438 1439 if (sc->vxl_im4o != NULL) { 1440 free(sc->vxl_im4o, M_VXLAN); 1441 sc->vxl_im4o = NULL; 1442 } 1443 1444 if (sc->vxl_im6o != NULL) { 1445 free(sc->vxl_im6o, M_VXLAN); 1446 sc->vxl_im6o = NULL; 1447 } 1448 } 1449 1450 static int 1451 vxlan_setup_multicast_interface(struct vxlan_softc *sc) 1452 { 1453 struct ifnet *ifp; 1454 1455 ifp = ifunit_ref(sc->vxl_mc_ifname); 1456 if (ifp == NULL) { 1457 if_printf(sc->vxl_ifp, "multicast interface %s does " 1458 "not exist\n", sc->vxl_mc_ifname); 1459 return (ENOENT); 1460 } 1461 1462 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1463 if_printf(sc->vxl_ifp, "interface %s does not support " 1464 "multicast\n", sc->vxl_mc_ifname); 1465 if_rele(ifp); 1466 return (ENOTSUP); 1467 } 1468 1469 sc->vxl_mc_ifp = ifp; 1470 sc->vxl_mc_ifindex = ifp->if_index; 1471 1472 return (0); 1473 } 1474 1475 static int 1476 vxlan_setup_multicast(struct vxlan_softc *sc) 1477 { 1478 const union vxlan_sockaddr *group; 1479 int error; 1480 1481 group = &sc->vxl_dst_addr; 1482 error = 0; 1483 1484 if (sc->vxl_mc_ifname[0] != '\0') { 1485 error = vxlan_setup_multicast_interface(sc); 1486 if (error) 1487 return (error); 1488 } 1489 1490 /* 1491 * Initialize an multicast options structure that is sufficiently 1492 * populated for use in the respective IP output routine. This 1493 * structure is typically stored in the socket, but our sockets 1494 * may be shared among multiple interfaces. 1495 */ 1496 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1497 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, 1498 M_ZERO | M_WAITOK); 1499 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; 1500 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 1501 sc->vxl_im4o->imo_multicast_vif = -1; 1502 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1503 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, 1504 M_ZERO | M_WAITOK); 1505 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; 1506 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 1507 } 1508 1509 return (error); 1510 } 1511 1512 static int 1513 vxlan_setup_socket(struct vxlan_softc *sc) 1514 { 1515 struct vxlan_socket *vso; 1516 struct ifnet *ifp; 1517 union vxlan_sockaddr *saddr, *daddr; 1518 int multicast, error; 1519 1520 vso = NULL; 1521 ifp = sc->vxl_ifp; 1522 saddr = &sc->vxl_src_addr; 1523 daddr = &sc->vxl_dst_addr; 1524 1525 multicast = vxlan_sockaddr_in_multicast(daddr); 1526 MPASS(multicast != -1); 1527 sc->vxl_vso_mc_index = -1; 1528 1529 /* 1530 * Try to create the socket. If that fails, attempt to use an 1531 * existing socket. 1532 */ 1533 error = vxlan_socket_create(ifp, multicast, saddr, &vso); 1534 if (error) { 1535 if (multicast != 0) 1536 vso = vxlan_socket_mc_lookup(saddr); 1537 else 1538 vso = vxlan_socket_lookup(saddr); 1539 1540 if (vso == NULL) { 1541 if_printf(ifp, "cannot create socket (error: %d), " 1542 "and no existing socket found\n", error); 1543 goto out; 1544 } 1545 } 1546 1547 if (multicast != 0) { 1548 error = vxlan_setup_multicast(sc); 1549 if (error) 1550 goto out; 1551 1552 error = vxlan_socket_mc_add_group(vso, daddr, saddr, 1553 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); 1554 if (error) 1555 goto out; 1556 } 1557 1558 sc->vxl_sock = vso; 1559 error = vxlan_socket_insert_softc(vso, sc); 1560 if (error) { 1561 sc->vxl_sock = NULL; 1562 if_printf(ifp, "network identifier %d already exists in " 1563 "this socket\n", sc->vxl_vni); 1564 goto out; 1565 } 1566 1567 return (0); 1568 1569 out: 1570 if (vso != NULL) { 1571 if (sc->vxl_vso_mc_index != -1) { 1572 vxlan_socket_mc_release_group_by_idx(vso, 1573 sc->vxl_vso_mc_index); 1574 sc->vxl_vso_mc_index = -1; 1575 } 1576 if (multicast != 0) 1577 vxlan_free_multicast(sc); 1578 vxlan_socket_release(vso); 1579 } 1580 1581 return (error); 1582 } 1583 1584 #ifdef INET6 1585 static void 1586 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc) 1587 { 1588 1589 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr)) 1590 return; 1591 1592 MPASS(sc->vxl_src_addr.in6.sin6_port != 0); 1593 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0); 1594 1595 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) { 1596 if_printf(sc->vxl_ifp, "port %d in src address does not match " 1597 "port %d in dst address, rfc6935_port (%d) not updated.\n", 1598 ntohs(sc->vxl_src_addr.in6.sin6_port), 1599 ntohs(sc->vxl_dst_addr.in6.sin6_port), 1600 V_zero_checksum_port); 1601 return; 1602 } 1603 1604 if (V_zero_checksum_port != 0) { 1605 if (V_zero_checksum_port != 1606 ntohs(sc->vxl_src_addr.in6.sin6_port)) { 1607 if_printf(sc->vxl_ifp, "rfc6935_port is already set to " 1608 "%d, cannot set it to %d.\n", V_zero_checksum_port, 1609 ntohs(sc->vxl_src_addr.in6.sin6_port)); 1610 } 1611 return; 1612 } 1613 1614 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port); 1615 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n", 1616 V_zero_checksum_port); 1617 } 1618 #endif 1619 1620 static void 1621 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc) 1622 { 1623 struct ifnet *ifp; 1624 1625 VXLAN_LOCK_WASSERT(sc); 1626 1627 ifp = sc->vxl_ifp; 1628 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); 1629 1630 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) 1631 ifp->if_hdrlen += sizeof(struct ip); 1632 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) 1633 ifp->if_hdrlen += sizeof(struct ip6_hdr); 1634 1635 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0) 1636 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen; 1637 } 1638 1639 static int 1640 vxlan_valid_init_config(struct vxlan_softc *sc) 1641 { 1642 const char *reason; 1643 1644 if (vxlan_check_vni(sc->vxl_vni) != 0) { 1645 reason = "invalid virtual network identifier specified"; 1646 goto fail; 1647 } 1648 1649 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { 1650 reason = "source address type is not supported"; 1651 goto fail; 1652 } 1653 1654 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { 1655 reason = "destination address type is not supported"; 1656 goto fail; 1657 } 1658 1659 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { 1660 reason = "no valid destination address specified"; 1661 goto fail; 1662 } 1663 1664 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && 1665 sc->vxl_mc_ifname[0] != '\0') { 1666 reason = "can only specify interface with a group address"; 1667 goto fail; 1668 } 1669 1670 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 1671 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ 1672 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { 1673 reason = "source and destination address must both " 1674 "be either IPv4 or IPv6"; 1675 goto fail; 1676 } 1677 } 1678 1679 if (sc->vxl_src_addr.in4.sin_port == 0) { 1680 reason = "local port not specified"; 1681 goto fail; 1682 } 1683 1684 if (sc->vxl_dst_addr.in4.sin_port == 0) { 1685 reason = "remote port not specified"; 1686 goto fail; 1687 } 1688 1689 return (0); 1690 1691 fail: 1692 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); 1693 return (EINVAL); 1694 } 1695 1696 static void 1697 vxlan_init_wait(struct vxlan_softc *sc) 1698 { 1699 1700 VXLAN_LOCK_WASSERT(sc); 1701 while (sc->vxl_flags & VXLAN_FLAG_INIT) 1702 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); 1703 } 1704 1705 static void 1706 vxlan_init_complete(struct vxlan_softc *sc) 1707 { 1708 1709 VXLAN_WLOCK(sc); 1710 sc->vxl_flags &= ~VXLAN_FLAG_INIT; 1711 wakeup(sc); 1712 VXLAN_WUNLOCK(sc); 1713 } 1714 1715 static void 1716 vxlan_init(void *xsc) 1717 { 1718 static const uint8_t empty_mac[ETHER_ADDR_LEN]; 1719 struct vxlan_softc *sc; 1720 struct ifnet *ifp; 1721 1722 sc = xsc; 1723 ifp = sc->vxl_ifp; 1724 1725 sx_xlock(&vxlan_sx); 1726 VXLAN_WLOCK(sc); 1727 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1728 VXLAN_WUNLOCK(sc); 1729 sx_xunlock(&vxlan_sx); 1730 return; 1731 } 1732 sc->vxl_flags |= VXLAN_FLAG_INIT; 1733 VXLAN_WUNLOCK(sc); 1734 1735 if (vxlan_valid_init_config(sc) != 0) 1736 goto out; 1737 1738 if (vxlan_setup_socket(sc) != 0) 1739 goto out; 1740 1741 #ifdef INET6 1742 vxlan_setup_zero_checksum_port(sc); 1743 #endif 1744 1745 /* Initialize the default forwarding entry. */ 1746 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, 1747 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); 1748 1749 VXLAN_WLOCK(sc); 1750 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1751 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, 1752 vxlan_timer, sc); 1753 VXLAN_WUNLOCK(sc); 1754 1755 if_link_state_change(ifp, LINK_STATE_UP); 1756 1757 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family, 1758 ntohs(sc->vxl_src_addr.in4.sin_port)); 1759 out: 1760 vxlan_init_complete(sc); 1761 sx_xunlock(&vxlan_sx); 1762 } 1763 1764 static void 1765 vxlan_release(struct vxlan_softc *sc) 1766 { 1767 1768 /* 1769 * The softc may be destroyed as soon as we release our reference, 1770 * so we cannot serialize the wakeup with the softc lock. We use a 1771 * timeout in our sleeps so a missed wakeup is unfortunate but not 1772 * fatal. 1773 */ 1774 if (VXLAN_RELEASE(sc) != 0) 1775 wakeup(sc); 1776 } 1777 1778 static void 1779 vxlan_teardown_wait(struct vxlan_softc *sc) 1780 { 1781 1782 VXLAN_LOCK_WASSERT(sc); 1783 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1784 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); 1785 } 1786 1787 static void 1788 vxlan_teardown_complete(struct vxlan_softc *sc) 1789 { 1790 1791 VXLAN_WLOCK(sc); 1792 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; 1793 wakeup(sc); 1794 VXLAN_WUNLOCK(sc); 1795 } 1796 1797 static void 1798 vxlan_teardown_locked(struct vxlan_softc *sc) 1799 { 1800 struct ifnet *ifp; 1801 struct vxlan_socket *vso; 1802 1803 sx_assert(&vxlan_sx, SA_XLOCKED); 1804 VXLAN_LOCK_WASSERT(sc); 1805 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); 1806 1807 ifp = sc->vxl_ifp; 1808 ifp->if_flags &= ~IFF_UP; 1809 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1810 callout_stop(&sc->vxl_callout); 1811 vso = sc->vxl_sock; 1812 sc->vxl_sock = NULL; 1813 1814 VXLAN_WUNLOCK(sc); 1815 if_link_state_change(ifp, LINK_STATE_DOWN); 1816 EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family, 1817 ntohs(sc->vxl_src_addr.in4.sin_port)); 1818 1819 if (vso != NULL) { 1820 vxlan_socket_remove_softc(vso, sc); 1821 1822 if (sc->vxl_vso_mc_index != -1) { 1823 vxlan_socket_mc_release_group_by_idx(vso, 1824 sc->vxl_vso_mc_index); 1825 sc->vxl_vso_mc_index = -1; 1826 } 1827 } 1828 1829 VXLAN_WLOCK(sc); 1830 while (sc->vxl_refcnt != 0) 1831 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); 1832 VXLAN_WUNLOCK(sc); 1833 1834 callout_drain(&sc->vxl_callout); 1835 1836 vxlan_free_multicast(sc); 1837 if (vso != NULL) 1838 vxlan_socket_release(vso); 1839 1840 vxlan_teardown_complete(sc); 1841 } 1842 1843 static void 1844 vxlan_teardown(struct vxlan_softc *sc) 1845 { 1846 1847 sx_xlock(&vxlan_sx); 1848 VXLAN_WLOCK(sc); 1849 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { 1850 vxlan_teardown_wait(sc); 1851 VXLAN_WUNLOCK(sc); 1852 sx_xunlock(&vxlan_sx); 1853 return; 1854 } 1855 1856 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1857 vxlan_teardown_locked(sc); 1858 sx_xunlock(&vxlan_sx); 1859 } 1860 1861 static void 1862 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, 1863 struct vxlan_softc_head *list) 1864 { 1865 1866 VXLAN_WLOCK(sc); 1867 1868 if (sc->vxl_mc_ifp != ifp) 1869 goto out; 1870 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1871 goto out; 1872 1873 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1874 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); 1875 1876 out: 1877 VXLAN_WUNLOCK(sc); 1878 } 1879 1880 static void 1881 vxlan_timer(void *xsc) 1882 { 1883 struct vxlan_softc *sc; 1884 1885 sc = xsc; 1886 VXLAN_LOCK_WASSERT(sc); 1887 1888 vxlan_ftable_expire(sc); 1889 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); 1890 } 1891 1892 static int 1893 vxlan_ioctl_ifflags(struct vxlan_softc *sc) 1894 { 1895 struct ifnet *ifp; 1896 1897 ifp = sc->vxl_ifp; 1898 1899 if (ifp->if_flags & IFF_UP) { 1900 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1901 vxlan_init(sc); 1902 } else { 1903 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1904 vxlan_teardown(sc); 1905 } 1906 1907 return (0); 1908 } 1909 1910 static int 1911 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) 1912 { 1913 struct rm_priotracker tracker; 1914 struct ifvxlancfg *cfg; 1915 1916 cfg = arg; 1917 bzero(cfg, sizeof(*cfg)); 1918 1919 VXLAN_RLOCK(sc, &tracker); 1920 cfg->vxlc_vni = sc->vxl_vni; 1921 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, 1922 sizeof(union vxlan_sockaddr)); 1923 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, 1924 sizeof(union vxlan_sockaddr)); 1925 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; 1926 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; 1927 cfg->vxlc_ftable_max = sc->vxl_ftable_max; 1928 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; 1929 cfg->vxlc_port_min = sc->vxl_min_port; 1930 cfg->vxlc_port_max = sc->vxl_max_port; 1931 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; 1932 cfg->vxlc_ttl = sc->vxl_ttl; 1933 VXLAN_RUNLOCK(sc, &tracker); 1934 1935 #ifdef INET6 1936 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) 1937 sa6_recoverscope(&cfg->vxlc_local_sa.in6); 1938 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) 1939 sa6_recoverscope(&cfg->vxlc_remote_sa.in6); 1940 #endif 1941 1942 return (0); 1943 } 1944 1945 static int 1946 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) 1947 { 1948 struct ifvxlancmd *cmd; 1949 int error; 1950 1951 cmd = arg; 1952 1953 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) 1954 return (EINVAL); 1955 1956 VXLAN_WLOCK(sc); 1957 if (vxlan_can_change_config(sc)) { 1958 sc->vxl_vni = cmd->vxlcmd_vni; 1959 error = 0; 1960 } else 1961 error = EBUSY; 1962 VXLAN_WUNLOCK(sc); 1963 1964 return (error); 1965 } 1966 1967 static int 1968 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) 1969 { 1970 struct ifvxlancmd *cmd; 1971 union vxlan_sockaddr *vxlsa; 1972 int error; 1973 1974 cmd = arg; 1975 vxlsa = &cmd->vxlcmd_sa; 1976 1977 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 1978 return (EINVAL); 1979 if (vxlan_sockaddr_in_multicast(vxlsa) != 0) 1980 return (EINVAL); 1981 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 1982 error = vxlan_sockaddr_in6_embedscope(vxlsa); 1983 if (error) 1984 return (error); 1985 } 1986 1987 VXLAN_WLOCK(sc); 1988 if (vxlan_can_change_config(sc)) { 1989 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); 1990 vxlan_set_hwcaps(sc); 1991 error = 0; 1992 } else 1993 error = EBUSY; 1994 VXLAN_WUNLOCK(sc); 1995 1996 return (error); 1997 } 1998 1999 static int 2000 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) 2001 { 2002 struct ifvxlancmd *cmd; 2003 union vxlan_sockaddr *vxlsa; 2004 int error; 2005 2006 cmd = arg; 2007 vxlsa = &cmd->vxlcmd_sa; 2008 2009 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 2010 return (EINVAL); 2011 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2012 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2013 if (error) 2014 return (error); 2015 } 2016 2017 VXLAN_WLOCK(sc); 2018 if (vxlan_can_change_config(sc)) { 2019 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); 2020 vxlan_setup_interface_hdrlen(sc); 2021 error = 0; 2022 } else 2023 error = EBUSY; 2024 VXLAN_WUNLOCK(sc); 2025 2026 return (error); 2027 } 2028 2029 static int 2030 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) 2031 { 2032 struct ifvxlancmd *cmd; 2033 int error; 2034 2035 cmd = arg; 2036 2037 if (cmd->vxlcmd_port == 0) 2038 return (EINVAL); 2039 2040 VXLAN_WLOCK(sc); 2041 if (vxlan_can_change_config(sc)) { 2042 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2043 error = 0; 2044 } else 2045 error = EBUSY; 2046 VXLAN_WUNLOCK(sc); 2047 2048 return (error); 2049 } 2050 2051 static int 2052 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) 2053 { 2054 struct ifvxlancmd *cmd; 2055 int error; 2056 2057 cmd = arg; 2058 2059 if (cmd->vxlcmd_port == 0) 2060 return (EINVAL); 2061 2062 VXLAN_WLOCK(sc); 2063 if (vxlan_can_change_config(sc)) { 2064 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2065 error = 0; 2066 } else 2067 error = EBUSY; 2068 VXLAN_WUNLOCK(sc); 2069 2070 return (error); 2071 } 2072 2073 static int 2074 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) 2075 { 2076 struct ifvxlancmd *cmd; 2077 uint16_t min, max; 2078 int error; 2079 2080 cmd = arg; 2081 min = cmd->vxlcmd_port_min; 2082 max = cmd->vxlcmd_port_max; 2083 2084 if (max < min) 2085 return (EINVAL); 2086 2087 VXLAN_WLOCK(sc); 2088 if (vxlan_can_change_config(sc)) { 2089 sc->vxl_min_port = min; 2090 sc->vxl_max_port = max; 2091 error = 0; 2092 } else 2093 error = EBUSY; 2094 VXLAN_WUNLOCK(sc); 2095 2096 return (error); 2097 } 2098 2099 static int 2100 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) 2101 { 2102 struct ifvxlancmd *cmd; 2103 int error; 2104 2105 cmd = arg; 2106 2107 VXLAN_WLOCK(sc); 2108 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { 2109 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; 2110 error = 0; 2111 } else 2112 error = EINVAL; 2113 VXLAN_WUNLOCK(sc); 2114 2115 return (error); 2116 } 2117 2118 static int 2119 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) 2120 { 2121 struct ifvxlancmd *cmd; 2122 int error; 2123 2124 cmd = arg; 2125 2126 VXLAN_WLOCK(sc); 2127 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { 2128 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; 2129 error = 0; 2130 } else 2131 error = EINVAL; 2132 VXLAN_WUNLOCK(sc); 2133 2134 return (error); 2135 } 2136 2137 static int 2138 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) 2139 { 2140 struct ifvxlancmd *cmd; 2141 int error; 2142 2143 cmd = arg; 2144 2145 VXLAN_WLOCK(sc); 2146 if (vxlan_can_change_config(sc)) { 2147 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); 2148 vxlan_set_hwcaps(sc); 2149 error = 0; 2150 } else 2151 error = EBUSY; 2152 VXLAN_WUNLOCK(sc); 2153 2154 return (error); 2155 } 2156 2157 static int 2158 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) 2159 { 2160 struct ifvxlancmd *cmd; 2161 int error; 2162 2163 cmd = arg; 2164 2165 VXLAN_WLOCK(sc); 2166 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { 2167 sc->vxl_ttl = cmd->vxlcmd_ttl; 2168 if (sc->vxl_im4o != NULL) 2169 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 2170 if (sc->vxl_im6o != NULL) 2171 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 2172 error = 0; 2173 } else 2174 error = EINVAL; 2175 VXLAN_WUNLOCK(sc); 2176 2177 return (error); 2178 } 2179 2180 static int 2181 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) 2182 { 2183 struct ifvxlancmd *cmd; 2184 2185 cmd = arg; 2186 2187 VXLAN_WLOCK(sc); 2188 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) 2189 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2190 else 2191 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 2192 VXLAN_WUNLOCK(sc); 2193 2194 return (0); 2195 } 2196 2197 static int 2198 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) 2199 { 2200 union vxlan_sockaddr vxlsa; 2201 struct ifvxlancmd *cmd; 2202 struct vxlan_ftable_entry *fe; 2203 int error; 2204 2205 cmd = arg; 2206 vxlsa = cmd->vxlcmd_sa; 2207 2208 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) 2209 return (EINVAL); 2210 if (vxlan_sockaddr_in_any(&vxlsa) != 0) 2211 return (EINVAL); 2212 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2213 return (EINVAL); 2214 /* BMV: We could support both IPv4 and IPv6 later. */ 2215 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) 2216 return (EAFNOSUPPORT); 2217 2218 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 2219 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 2220 if (error) 2221 return (error); 2222 } 2223 2224 fe = vxlan_ftable_entry_alloc(); 2225 if (fe == NULL) 2226 return (ENOMEM); 2227 2228 if (vxlsa.in4.sin_port == 0) 2229 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 2230 2231 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, 2232 VXLAN_FE_FLAG_STATIC); 2233 2234 VXLAN_WLOCK(sc); 2235 error = vxlan_ftable_entry_insert(sc, fe); 2236 VXLAN_WUNLOCK(sc); 2237 2238 if (error) 2239 vxlan_ftable_entry_free(fe); 2240 2241 return (error); 2242 } 2243 2244 static int 2245 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) 2246 { 2247 struct ifvxlancmd *cmd; 2248 struct vxlan_ftable_entry *fe; 2249 int error; 2250 2251 cmd = arg; 2252 2253 VXLAN_WLOCK(sc); 2254 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); 2255 if (fe != NULL) { 2256 vxlan_ftable_entry_destroy(sc, fe); 2257 error = 0; 2258 } else 2259 error = ENOENT; 2260 VXLAN_WUNLOCK(sc); 2261 2262 return (error); 2263 } 2264 2265 static int 2266 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) 2267 { 2268 struct ifvxlancmd *cmd; 2269 int all; 2270 2271 cmd = arg; 2272 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; 2273 2274 VXLAN_WLOCK(sc); 2275 vxlan_ftable_flush(sc, all); 2276 VXLAN_WUNLOCK(sc); 2277 2278 return (0); 2279 } 2280 2281 static int 2282 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) 2283 { 2284 const struct vxlan_control *vc; 2285 union { 2286 struct ifvxlancfg cfg; 2287 struct ifvxlancmd cmd; 2288 } args; 2289 int out, error; 2290 2291 if (ifd->ifd_cmd >= vxlan_control_table_size) 2292 return (EINVAL); 2293 2294 bzero(&args, sizeof(args)); 2295 vc = &vxlan_control_table[ifd->ifd_cmd]; 2296 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; 2297 2298 if ((get != 0 && out == 0) || (get == 0 && out != 0)) 2299 return (EINVAL); 2300 2301 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { 2302 error = priv_check(curthread, PRIV_NET_VXLAN); 2303 if (error) 2304 return (error); 2305 } 2306 2307 if (ifd->ifd_len != vc->vxlc_argsize || 2308 ifd->ifd_len > sizeof(args)) 2309 return (EINVAL); 2310 2311 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { 2312 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 2313 if (error) 2314 return (error); 2315 } 2316 2317 error = vc->vxlc_func(sc, &args); 2318 if (error) 2319 return (error); 2320 2321 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { 2322 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); 2323 if (error) 2324 return (error); 2325 } 2326 2327 return (0); 2328 } 2329 2330 static int 2331 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2332 { 2333 struct rm_priotracker tracker; 2334 struct vxlan_softc *sc; 2335 struct ifreq *ifr; 2336 struct ifdrv *ifd; 2337 int error; 2338 2339 sc = ifp->if_softc; 2340 ifr = (struct ifreq *) data; 2341 ifd = (struct ifdrv *) data; 2342 2343 error = 0; 2344 2345 switch (cmd) { 2346 case SIOCADDMULTI: 2347 case SIOCDELMULTI: 2348 break; 2349 2350 case SIOCGDRVSPEC: 2351 case SIOCSDRVSPEC: 2352 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); 2353 break; 2354 2355 case SIOCSIFFLAGS: 2356 error = vxlan_ioctl_ifflags(sc); 2357 break; 2358 2359 case SIOCSIFMEDIA: 2360 case SIOCGIFMEDIA: 2361 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); 2362 break; 2363 2364 case SIOCSIFMTU: 2365 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) { 2366 error = EINVAL; 2367 } else { 2368 VXLAN_WLOCK(sc); 2369 ifp->if_mtu = ifr->ifr_mtu; 2370 sc->vxl_flags |= VXLAN_FLAG_USER_MTU; 2371 VXLAN_WUNLOCK(sc); 2372 } 2373 break; 2374 2375 case SIOCSIFCAP: 2376 VXLAN_WLOCK(sc); 2377 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap); 2378 if (error == 0) 2379 vxlan_set_hwcaps(sc); 2380 VXLAN_WUNLOCK(sc); 2381 break; 2382 2383 case SIOCGTUNFIB: 2384 VXLAN_RLOCK(sc, &tracker); 2385 ifr->ifr_fib = sc->vxl_fibnum; 2386 VXLAN_RUNLOCK(sc, &tracker); 2387 break; 2388 2389 case SIOCSTUNFIB: 2390 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0) 2391 break; 2392 2393 if (ifr->ifr_fib >= rt_numfibs) 2394 error = EINVAL; 2395 else { 2396 VXLAN_WLOCK(sc); 2397 sc->vxl_fibnum = ifr->ifr_fib; 2398 VXLAN_WUNLOCK(sc); 2399 } 2400 break; 2401 2402 default: 2403 error = ether_ioctl(ifp, cmd, data); 2404 break; 2405 } 2406 2407 return (error); 2408 } 2409 2410 #if defined(INET) || defined(INET6) 2411 static uint16_t 2412 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) 2413 { 2414 int range; 2415 uint32_t hash; 2416 2417 range = sc->vxl_max_port - sc->vxl_min_port + 1; 2418 2419 if (M_HASHTYPE_ISHASH(m)) 2420 hash = m->m_pkthdr.flowid; 2421 else 2422 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, 2423 sc->vxl_port_hash_key); 2424 2425 return (sc->vxl_min_port + (hash % range)); 2426 } 2427 2428 static void 2429 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, 2430 uint16_t srcport, uint16_t dstport) 2431 { 2432 struct vxlanudphdr *hdr; 2433 struct udphdr *udph; 2434 struct vxlan_header *vxh; 2435 int len; 2436 2437 len = m->m_pkthdr.len - ipoff; 2438 MPASS(len >= sizeof(struct vxlanudphdr)); 2439 hdr = mtodo(m, ipoff); 2440 2441 udph = &hdr->vxlh_udp; 2442 udph->uh_sport = srcport; 2443 udph->uh_dport = dstport; 2444 udph->uh_ulen = htons(len); 2445 udph->uh_sum = 0; 2446 2447 vxh = &hdr->vxlh_hdr; 2448 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); 2449 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); 2450 } 2451 #endif 2452 2453 #if defined(INET6) || defined(INET) 2454 /* 2455 * Return the CSUM_INNER_* equivalent of CSUM_* caps. 2456 */ 2457 static uint32_t 2458 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap) 2459 { 2460 uint32_t csum_flags = encap; 2461 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP; 2462 2463 /* 2464 * csum_flags can request either v4 or v6 offload but not both. 2465 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO) 2466 * so those bits are no good to detect the IP version. Other bits are 2467 * always set with CSUM_TSO and we use those to figure out the IP 2468 * version. 2469 */ 2470 if (csum_flags_in & v4) { 2471 if (csum_flags_in & CSUM_IP) 2472 csum_flags |= CSUM_INNER_IP; 2473 if (csum_flags_in & CSUM_IP_UDP) 2474 csum_flags |= CSUM_INNER_IP_UDP; 2475 if (csum_flags_in & CSUM_IP_TCP) 2476 csum_flags |= CSUM_INNER_IP_TCP; 2477 if (csum_flags_in & CSUM_IP_TSO) 2478 csum_flags |= CSUM_INNER_IP_TSO; 2479 } else { 2480 #ifdef INVARIANTS 2481 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP; 2482 2483 MPASS((csum_flags_in & v6) != 0); 2484 #endif 2485 if (csum_flags_in & CSUM_IP6_UDP) 2486 csum_flags |= CSUM_INNER_IP6_UDP; 2487 if (csum_flags_in & CSUM_IP6_TCP) 2488 csum_flags |= CSUM_INNER_IP6_TCP; 2489 if (csum_flags_in & CSUM_IP6_TSO) 2490 csum_flags |= CSUM_INNER_IP6_TSO; 2491 } 2492 2493 return (csum_flags); 2494 } 2495 #endif 2496 2497 static int 2498 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2499 struct mbuf *m) 2500 { 2501 #ifdef INET 2502 struct ifnet *ifp; 2503 struct ip *ip; 2504 struct in_addr srcaddr, dstaddr; 2505 uint16_t srcport, dstport; 2506 int len, mcast, error; 2507 struct route route, *ro; 2508 struct sockaddr_in *sin; 2509 uint32_t csum_flags; 2510 2511 NET_EPOCH_ASSERT(); 2512 2513 ifp = sc->vxl_ifp; 2514 srcaddr = sc->vxl_src_addr.in4.sin_addr; 2515 srcport = vxlan_pick_source_port(sc, m); 2516 dstaddr = fvxlsa->in4.sin_addr; 2517 dstport = fvxlsa->in4.sin_port; 2518 2519 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), 2520 M_NOWAIT); 2521 if (m == NULL) { 2522 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2523 return (ENOBUFS); 2524 } 2525 2526 len = m->m_pkthdr.len; 2527 2528 ip = mtod(m, struct ip *); 2529 ip->ip_tos = 0; 2530 ip->ip_len = htons(len); 2531 ip->ip_off = 0; 2532 ip->ip_ttl = sc->vxl_ttl; 2533 ip->ip_p = IPPROTO_UDP; 2534 ip->ip_sum = 0; 2535 ip->ip_src = srcaddr; 2536 ip->ip_dst = dstaddr; 2537 2538 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); 2539 2540 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2541 m->m_flags &= ~(M_MCAST | M_BCAST); 2542 2543 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2544 if (m->m_pkthdr.csum_flags != 0) { 2545 /* 2546 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2547 * up the ifnet for the outbound route and verify that the 2548 * outbound ifnet can perform the requested operation on the 2549 * inner frame. 2550 */ 2551 bzero(&route, sizeof(route)); 2552 ro = &route; 2553 sin = (struct sockaddr_in *)&ro->ro_dst; 2554 sin->sin_family = AF_INET; 2555 sin->sin_len = sizeof(*sin); 2556 sin->sin_addr = ip->ip_dst; 2557 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 2558 0); 2559 if (ro->ro_nh == NULL) { 2560 m_freem(m); 2561 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2562 return (EHOSTUNREACH); 2563 } 2564 2565 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2566 CSUM_ENCAP_VXLAN); 2567 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2568 csum_flags) { 2569 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2570 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2571 2572 if_printf(ifp, "interface %s is missing hwcaps " 2573 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2574 "hwassist 0x%08x\n", nh_ifp->if_xname, 2575 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2576 m->m_pkthdr.csum_flags, csum_flags, 2577 (uint32_t)nh_ifp->if_hwassist); 2578 } 2579 m_freem(m); 2580 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2581 return (ENXIO); 2582 } 2583 m->m_pkthdr.csum_flags = csum_flags; 2584 if (csum_flags & 2585 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2586 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2587 counter_u64_add(sc->vxl_stats.txcsum, 1); 2588 if (csum_flags & CSUM_INNER_TSO) 2589 counter_u64_add(sc->vxl_stats.tso, 1); 2590 } 2591 } else 2592 ro = NULL; 2593 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL); 2594 if (error == 0) { 2595 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2596 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 2597 if (mcast != 0) 2598 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2599 } else 2600 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2601 2602 return (error); 2603 #else 2604 m_freem(m); 2605 return (ENOTSUP); 2606 #endif 2607 } 2608 2609 static int 2610 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2611 struct mbuf *m) 2612 { 2613 #ifdef INET6 2614 struct ifnet *ifp; 2615 struct ip6_hdr *ip6; 2616 const struct in6_addr *srcaddr, *dstaddr; 2617 uint16_t srcport, dstport; 2618 int len, mcast, error; 2619 struct route_in6 route, *ro; 2620 struct sockaddr_in6 *sin6; 2621 uint32_t csum_flags; 2622 2623 NET_EPOCH_ASSERT(); 2624 2625 ifp = sc->vxl_ifp; 2626 srcaddr = &sc->vxl_src_addr.in6.sin6_addr; 2627 srcport = vxlan_pick_source_port(sc, m); 2628 dstaddr = &fvxlsa->in6.sin6_addr; 2629 dstport = fvxlsa->in6.sin6_port; 2630 2631 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), 2632 M_NOWAIT); 2633 if (m == NULL) { 2634 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2635 return (ENOBUFS); 2636 } 2637 2638 len = m->m_pkthdr.len; 2639 2640 ip6 = mtod(m, struct ip6_hdr *); 2641 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ 2642 ip6->ip6_vfc = IPV6_VERSION; 2643 ip6->ip6_plen = 0; 2644 ip6->ip6_nxt = IPPROTO_UDP; 2645 ip6->ip6_hlim = sc->vxl_ttl; 2646 ip6->ip6_src = *srcaddr; 2647 ip6->ip6_dst = *dstaddr; 2648 2649 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); 2650 2651 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2652 m->m_flags &= ~(M_MCAST | M_BCAST); 2653 2654 ro = NULL; 2655 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2656 if (m->m_pkthdr.csum_flags != 0) { 2657 /* 2658 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2659 * up the ifnet for the outbound route and verify that the 2660 * outbound ifnet can perform the requested operation on the 2661 * inner frame. 2662 */ 2663 bzero(&route, sizeof(route)); 2664 ro = &route; 2665 sin6 = (struct sockaddr_in6 *)&ro->ro_dst; 2666 sin6->sin6_family = AF_INET6; 2667 sin6->sin6_len = sizeof(*sin6); 2668 sin6->sin6_addr = ip6->ip6_dst; 2669 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, 2670 NHR_NONE, 0); 2671 if (ro->ro_nh == NULL) { 2672 m_freem(m); 2673 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2674 return (EHOSTUNREACH); 2675 } 2676 2677 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2678 CSUM_ENCAP_VXLAN); 2679 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2680 csum_flags) { 2681 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2682 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2683 2684 if_printf(ifp, "interface %s is missing hwcaps " 2685 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2686 "hwassist 0x%08x\n", nh_ifp->if_xname, 2687 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2688 m->m_pkthdr.csum_flags, csum_flags, 2689 (uint32_t)nh_ifp->if_hwassist); 2690 } 2691 m_freem(m); 2692 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2693 return (ENXIO); 2694 } 2695 m->m_pkthdr.csum_flags = csum_flags; 2696 if (csum_flags & 2697 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2698 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2699 counter_u64_add(sc->vxl_stats.txcsum, 1); 2700 if (csum_flags & CSUM_INNER_TSO) 2701 counter_u64_add(sc->vxl_stats.tso, 1); 2702 } 2703 } else if (ntohs(dstport) != V_zero_checksum_port) { 2704 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); 2705 2706 hdr->uh_sum = in6_cksum_pseudo(ip6, 2707 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); 2708 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; 2709 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 2710 } 2711 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL); 2712 if (error == 0) { 2713 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2714 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 2715 if (mcast != 0) 2716 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2717 } else 2718 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2719 2720 return (error); 2721 #else 2722 m_freem(m); 2723 return (ENOTSUP); 2724 #endif 2725 } 2726 2727 static int 2728 vxlan_transmit(struct ifnet *ifp, struct mbuf *m) 2729 { 2730 struct rm_priotracker tracker; 2731 union vxlan_sockaddr vxlsa; 2732 struct vxlan_softc *sc; 2733 struct vxlan_ftable_entry *fe; 2734 struct ifnet *mcifp; 2735 struct ether_header *eh; 2736 int ipv4, error; 2737 2738 sc = ifp->if_softc; 2739 eh = mtod(m, struct ether_header *); 2740 fe = NULL; 2741 mcifp = NULL; 2742 2743 ETHER_BPF_MTAP(ifp, m); 2744 2745 VXLAN_RLOCK(sc, &tracker); 2746 M_SETFIB(m, sc->vxl_fibnum); 2747 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2748 VXLAN_RUNLOCK(sc, &tracker); 2749 m_freem(m); 2750 return (ENETDOWN); 2751 } 2752 2753 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 2754 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); 2755 if (fe == NULL) 2756 fe = &sc->vxl_default_fe; 2757 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); 2758 2759 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; 2760 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2761 mcifp = vxlan_multicast_if_ref(sc, ipv4); 2762 2763 VXLAN_ACQUIRE(sc); 2764 VXLAN_RUNLOCK(sc, &tracker); 2765 2766 if (ipv4 != 0) 2767 error = vxlan_encap4(sc, &vxlsa, m); 2768 else 2769 error = vxlan_encap6(sc, &vxlsa, m); 2770 2771 vxlan_release(sc); 2772 if (mcifp != NULL) 2773 if_rele(mcifp); 2774 2775 return (error); 2776 } 2777 2778 static void 2779 vxlan_qflush(struct ifnet *ifp __unused) 2780 { 2781 } 2782 2783 static bool 2784 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, 2785 const struct sockaddr *srcsa, void *xvso) 2786 { 2787 struct vxlan_socket *vso; 2788 struct vxlan_header *vxh, vxlanhdr; 2789 uint32_t vni; 2790 int error __unused; 2791 2792 M_ASSERTPKTHDR(m); 2793 vso = xvso; 2794 offset += sizeof(struct udphdr); 2795 2796 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) 2797 goto out; 2798 2799 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { 2800 m_copydata(m, offset, sizeof(struct vxlan_header), 2801 (caddr_t) &vxlanhdr); 2802 vxh = &vxlanhdr; 2803 } else 2804 vxh = mtodo(m, offset); 2805 2806 /* 2807 * Drop if there is a reserved bit set in either the flags or VNI 2808 * fields of the header. This goes against the specification, but 2809 * a bit set may indicate an unsupported new feature. This matches 2810 * the behavior of the Linux implementation. 2811 */ 2812 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || 2813 vxh->vxlh_vni & ~VXLAN_VNI_MASK) 2814 goto out; 2815 2816 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; 2817 2818 /* Adjust to the start of the inner Ethernet frame. */ 2819 m_adj_decap(m, offset + sizeof(struct vxlan_header)); 2820 2821 error = vxlan_input(vso, vni, &m, srcsa); 2822 MPASS(error != 0 || m == NULL); 2823 2824 out: 2825 if (m != NULL) 2826 m_freem(m); 2827 2828 return (true); 2829 } 2830 2831 static int 2832 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, 2833 const struct sockaddr *sa) 2834 { 2835 struct vxlan_softc *sc; 2836 struct ifnet *ifp; 2837 struct mbuf *m; 2838 struct ether_header *eh; 2839 int error; 2840 2841 m = *m0; 2842 2843 if (m->m_pkthdr.len < ETHER_HDR_LEN) 2844 return (EINVAL); 2845 2846 sc = vxlan_socket_lookup_softc(vso, vni); 2847 if (sc == NULL) 2848 return (ENOENT); 2849 2850 ifp = sc->vxl_ifp; 2851 eh = mtod(m, struct ether_header *); 2852 2853 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2854 error = ENETDOWN; 2855 goto out; 2856 } else if (ifp == m->m_pkthdr.rcvif) { 2857 /* XXX Does not catch more complex loops. */ 2858 error = EDEADLK; 2859 goto out; 2860 } 2861 2862 if (sc->vxl_flags & VXLAN_FLAG_LEARN) 2863 vxlan_ftable_learn(sc, sa, eh->ether_shost); 2864 2865 m_clrprotoflags(m); 2866 m->m_pkthdr.rcvif = ifp; 2867 M_SETFIB(m, ifp->if_fib); 2868 if (((ifp->if_capenable & IFCAP_RXCSUM && 2869 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) || 2870 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2871 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) { 2872 uint32_t csum_flags = 0; 2873 2874 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) 2875 csum_flags |= CSUM_L3_CALC; 2876 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID) 2877 csum_flags |= CSUM_L3_VALID; 2878 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC) 2879 csum_flags |= CSUM_L4_CALC; 2880 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID) 2881 csum_flags |= CSUM_L4_VALID; 2882 m->m_pkthdr.csum_flags = csum_flags; 2883 counter_u64_add(sc->vxl_stats.rxcsum, 1); 2884 } else { 2885 /* clear everything */ 2886 m->m_pkthdr.csum_flags = 0; 2887 m->m_pkthdr.csum_data = 0; 2888 } 2889 2890 (*ifp->if_input)(ifp, m); 2891 *m0 = NULL; 2892 error = 0; 2893 2894 out: 2895 vxlan_release(sc); 2896 return (error); 2897 } 2898 2899 static int 2900 vxlan_stats_alloc(struct vxlan_softc *sc) 2901 { 2902 struct vxlan_statistics *stats = &sc->vxl_stats; 2903 2904 stats->txcsum = counter_u64_alloc(M_WAITOK); 2905 if (stats->txcsum == NULL) 2906 goto failed; 2907 2908 stats->tso = counter_u64_alloc(M_WAITOK); 2909 if (stats->tso == NULL) 2910 goto failed; 2911 2912 stats->rxcsum = counter_u64_alloc(M_WAITOK); 2913 if (stats->rxcsum == NULL) 2914 goto failed; 2915 2916 return (0); 2917 failed: 2918 vxlan_stats_free(sc); 2919 return (ENOMEM); 2920 } 2921 2922 static void 2923 vxlan_stats_free(struct vxlan_softc *sc) 2924 { 2925 struct vxlan_statistics *stats = &sc->vxl_stats; 2926 2927 if (stats->txcsum != NULL) { 2928 counter_u64_free(stats->txcsum); 2929 stats->txcsum = NULL; 2930 } 2931 if (stats->tso != NULL) { 2932 counter_u64_free(stats->tso); 2933 stats->tso = NULL; 2934 } 2935 if (stats->rxcsum != NULL) { 2936 counter_u64_free(stats->rxcsum); 2937 stats->rxcsum = NULL; 2938 } 2939 } 2940 2941 static void 2942 vxlan_set_default_config(struct vxlan_softc *sc) 2943 { 2944 2945 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2946 2947 sc->vxl_vni = VXLAN_VNI_MAX; 2948 sc->vxl_ttl = IPDEFTTL; 2949 2950 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { 2951 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); 2952 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); 2953 } else { 2954 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2955 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2956 } 2957 2958 sc->vxl_min_port = V_ipport_firstauto; 2959 sc->vxl_max_port = V_ipport_lastauto; 2960 2961 sc->vxl_ftable_max = VXLAN_FTABLE_MAX; 2962 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; 2963 } 2964 2965 static int 2966 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) 2967 { 2968 2969 #ifndef INET 2970 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | 2971 VXLAN_PARAM_WITH_REMOTE_ADDR4)) 2972 return (EAFNOSUPPORT); 2973 #endif 2974 2975 #ifndef INET6 2976 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | 2977 VXLAN_PARAM_WITH_REMOTE_ADDR6)) 2978 return (EAFNOSUPPORT); 2979 #else 2980 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 2981 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); 2982 if (error) 2983 return (error); 2984 } 2985 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 2986 int error = vxlan_sockaddr_in6_embedscope( 2987 &vxlp->vxlp_remote_sa); 2988 if (error) 2989 return (error); 2990 } 2991 #endif 2992 2993 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { 2994 if (vxlan_check_vni(vxlp->vxlp_vni) == 0) 2995 sc->vxl_vni = vxlp->vxlp_vni; 2996 } 2997 2998 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { 2999 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); 3000 sc->vxl_src_addr.in4.sin_family = AF_INET; 3001 sc->vxl_src_addr.in4.sin_addr = 3002 vxlp->vxlp_local_sa.in4.sin_addr; 3003 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 3004 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3005 sc->vxl_src_addr.in6.sin6_family = AF_INET6; 3006 sc->vxl_src_addr.in6.sin6_addr = 3007 vxlp->vxlp_local_sa.in6.sin6_addr; 3008 } 3009 3010 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { 3011 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); 3012 sc->vxl_dst_addr.in4.sin_family = AF_INET; 3013 sc->vxl_dst_addr.in4.sin_addr = 3014 vxlp->vxlp_remote_sa.in4.sin_addr; 3015 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 3016 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3017 sc->vxl_dst_addr.in6.sin6_family = AF_INET6; 3018 sc->vxl_dst_addr.in6.sin6_addr = 3019 vxlp->vxlp_remote_sa.in6.sin6_addr; 3020 } 3021 3022 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) 3023 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); 3024 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) 3025 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); 3026 3027 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { 3028 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { 3029 sc->vxl_min_port = vxlp->vxlp_min_port; 3030 sc->vxl_max_port = vxlp->vxlp_max_port; 3031 } 3032 } 3033 3034 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) 3035 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); 3036 3037 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { 3038 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) 3039 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; 3040 } 3041 3042 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { 3043 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) 3044 sc->vxl_ftable_max = vxlp->vxlp_ftable_max; 3045 } 3046 3047 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { 3048 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) 3049 sc->vxl_ttl = vxlp->vxlp_ttl; 3050 } 3051 3052 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { 3053 if (vxlp->vxlp_learn == 0) 3054 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 3055 } 3056 3057 return (0); 3058 } 3059 3060 static int 3061 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap) 3062 { 3063 int mask = reqcap ^ ifp->if_capenable; 3064 3065 /* Disable TSO if tx checksums are disabled. */ 3066 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) && 3067 reqcap & IFCAP_TSO4) { 3068 reqcap &= ~IFCAP_TSO4; 3069 if_printf(ifp, "tso4 disabled due to -txcsum.\n"); 3070 } 3071 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) && 3072 reqcap & IFCAP_TSO6) { 3073 reqcap &= ~IFCAP_TSO6; 3074 if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); 3075 } 3076 3077 /* Do not enable TSO if tx checksums are disabled. */ 3078 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 && 3079 !(reqcap & IFCAP_TXCSUM)) { 3080 if_printf(ifp, "enable txcsum first.\n"); 3081 return (EAGAIN); 3082 } 3083 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 && 3084 !(reqcap & IFCAP_TXCSUM_IPV6)) { 3085 if_printf(ifp, "enable txcsum6 first.\n"); 3086 return (EAGAIN); 3087 } 3088 3089 sc->vxl_reqcap = reqcap; 3090 return (0); 3091 } 3092 3093 /* 3094 * A VXLAN interface inherits the capabilities of the vxlandev or the interface 3095 * hosting the vxlanlocal address. 3096 */ 3097 static void 3098 vxlan_set_hwcaps(struct vxlan_softc *sc) 3099 { 3100 struct epoch_tracker et; 3101 struct ifnet *p; 3102 struct ifaddr *ifa; 3103 u_long hwa; 3104 int cap, ena; 3105 bool rel; 3106 struct ifnet *ifp = sc->vxl_ifp; 3107 3108 /* reset caps */ 3109 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS; 3110 ifp->if_capenable &= VXLAN_BASIC_IFCAPS; 3111 ifp->if_hwassist = 0; 3112 3113 NET_EPOCH_ENTER(et); 3114 CURVNET_SET(ifp->if_vnet); 3115 3116 rel = false; 3117 p = NULL; 3118 if (sc->vxl_mc_ifname[0] != '\0') { 3119 rel = true; 3120 p = ifunit_ref(sc->vxl_mc_ifname); 3121 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 3122 if (sc->vxl_src_addr.sa.sa_family == AF_INET) { 3123 struct sockaddr_in in4 = sc->vxl_src_addr.in4; 3124 3125 in4.sin_port = 0; 3126 ifa = ifa_ifwithaddr((struct sockaddr *)&in4); 3127 if (ifa != NULL) 3128 p = ifa->ifa_ifp; 3129 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) { 3130 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6; 3131 3132 in6.sin6_port = 0; 3133 ifa = ifa_ifwithaddr((struct sockaddr *)&in6); 3134 if (ifa != NULL) 3135 p = ifa->ifa_ifp; 3136 } 3137 } 3138 if (p == NULL) 3139 goto done; 3140 3141 cap = ena = hwa = 0; 3142 3143 /* checksum offload */ 3144 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM) 3145 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3146 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) { 3147 ena |= sc->vxl_reqcap & p->if_capenable & 3148 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3149 if (ena & IFCAP_TXCSUM) { 3150 if (p->if_hwassist & CSUM_INNER_IP) 3151 hwa |= CSUM_IP; 3152 if (p->if_hwassist & CSUM_INNER_IP_UDP) 3153 hwa |= CSUM_IP_UDP; 3154 if (p->if_hwassist & CSUM_INNER_IP_TCP) 3155 hwa |= CSUM_IP_TCP; 3156 } 3157 if (ena & IFCAP_TXCSUM_IPV6) { 3158 if (p->if_hwassist & CSUM_INNER_IP6_UDP) 3159 hwa |= CSUM_IP6_UDP; 3160 if (p->if_hwassist & CSUM_INNER_IP6_TCP) 3161 hwa |= CSUM_IP6_TCP; 3162 } 3163 } 3164 3165 /* hardware TSO */ 3166 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) { 3167 cap |= p->if_capabilities & IFCAP_TSO; 3168 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen) 3169 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen; 3170 else 3171 ifp->if_hw_tsomax = p->if_hw_tsomax; 3172 /* XXX: tsomaxsegcount decrement is cxgbe specific */ 3173 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1; 3174 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize; 3175 } 3176 if (p->if_capenable & IFCAP_VXLAN_HWTSO) { 3177 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO; 3178 if (ena & IFCAP_TSO) { 3179 if (p->if_hwassist & CSUM_INNER_IP_TSO) 3180 hwa |= CSUM_IP_TSO; 3181 if (p->if_hwassist & CSUM_INNER_IP6_TSO) 3182 hwa |= CSUM_IP6_TSO; 3183 } 3184 } 3185 3186 ifp->if_capabilities |= cap; 3187 ifp->if_capenable |= ena; 3188 ifp->if_hwassist |= hwa; 3189 if (rel) 3190 if_rele(p); 3191 done: 3192 CURVNET_RESTORE(); 3193 NET_EPOCH_EXIT(et); 3194 } 3195 3196 static int 3197 vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params) 3198 { 3199 struct vxlan_softc *sc; 3200 struct ifnet *ifp; 3201 struct ifvxlanparam vxlp; 3202 int error; 3203 3204 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); 3205 sc->vxl_unit = unit; 3206 sc->vxl_fibnum = curthread->td_proc->p_fibnum; 3207 vxlan_set_default_config(sc); 3208 error = vxlan_stats_alloc(sc); 3209 if (error != 0) 3210 goto fail; 3211 3212 if (params != 0) { 3213 error = copyin(params, &vxlp, sizeof(vxlp)); 3214 if (error) 3215 goto fail; 3216 3217 error = vxlan_set_user_config(sc, &vxlp); 3218 if (error) 3219 goto fail; 3220 } 3221 3222 ifp = if_alloc(IFT_ETHER); 3223 if (ifp == NULL) { 3224 error = ENOSPC; 3225 goto fail; 3226 } 3227 3228 sc->vxl_ifp = ifp; 3229 rm_init(&sc->vxl_lock, "vxlanrm"); 3230 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); 3231 sc->vxl_port_hash_key = arc4random(); 3232 vxlan_ftable_init(sc); 3233 3234 vxlan_sysctl_setup(sc); 3235 3236 ifp->if_softc = sc; 3237 if_initname(ifp, vxlan_name, unit); 3238 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3239 ifp->if_init = vxlan_init; 3240 ifp->if_ioctl = vxlan_ioctl; 3241 ifp->if_transmit = vxlan_transmit; 3242 ifp->if_qflush = vxlan_qflush; 3243 ifp->if_capabilities = VXLAN_BASIC_IFCAPS; 3244 ifp->if_capenable = VXLAN_BASIC_IFCAPS; 3245 sc->vxl_reqcap = -1; 3246 vxlan_set_hwcaps(sc); 3247 3248 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); 3249 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); 3250 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); 3251 3252 ether_gen_addr(ifp, &sc->vxl_hwaddr); 3253 ether_ifattach(ifp, sc->vxl_hwaddr.octet); 3254 3255 ifp->if_baudrate = 0; 3256 3257 VXLAN_WLOCK(sc); 3258 vxlan_setup_interface_hdrlen(sc); 3259 VXLAN_WUNLOCK(sc); 3260 3261 return (0); 3262 3263 fail: 3264 free(sc, M_VXLAN); 3265 return (error); 3266 } 3267 3268 static void 3269 vxlan_clone_destroy(struct ifnet *ifp) 3270 { 3271 struct vxlan_softc *sc; 3272 3273 sc = ifp->if_softc; 3274 3275 vxlan_teardown(sc); 3276 3277 vxlan_ftable_flush(sc, 1); 3278 3279 ether_ifdetach(ifp); 3280 if_free(ifp); 3281 ifmedia_removeall(&sc->vxl_media); 3282 3283 vxlan_ftable_fini(sc); 3284 3285 vxlan_sysctl_destroy(sc); 3286 rm_destroy(&sc->vxl_lock); 3287 vxlan_stats_free(sc); 3288 free(sc, M_VXLAN); 3289 } 3290 3291 /* BMV: Taken from if_bridge. */ 3292 static uint32_t 3293 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) 3294 { 3295 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; 3296 3297 b += addr[5] << 8; 3298 b += addr[4]; 3299 a += addr[3] << 24; 3300 a += addr[2] << 16; 3301 a += addr[1] << 8; 3302 a += addr[0]; 3303 3304 /* 3305 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3306 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3307 */ 3308 #define mix(a, b, c) \ 3309 do { \ 3310 a -= b; a -= c; a ^= (c >> 13); \ 3311 b -= c; b -= a; b ^= (a << 8); \ 3312 c -= a; c -= b; c ^= (b >> 13); \ 3313 a -= b; a -= c; a ^= (c >> 12); \ 3314 b -= c; b -= a; b ^= (a << 16); \ 3315 c -= a; c -= b; c ^= (b >> 5); \ 3316 a -= b; a -= c; a ^= (c >> 3); \ 3317 b -= c; b -= a; b ^= (a << 10); \ 3318 c -= a; c -= b; c ^= (b >> 15); \ 3319 } while (0) 3320 3321 mix(a, b, c); 3322 3323 #undef mix 3324 3325 return (c); 3326 } 3327 3328 static int 3329 vxlan_media_change(struct ifnet *ifp) 3330 { 3331 3332 /* Ignore. */ 3333 return (0); 3334 } 3335 3336 static void 3337 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3338 { 3339 3340 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; 3341 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3342 } 3343 3344 static int 3345 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, 3346 const struct sockaddr *sa) 3347 { 3348 3349 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); 3350 } 3351 3352 static void 3353 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, 3354 const struct sockaddr *sa) 3355 { 3356 3357 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3358 bzero(vxladdr, sizeof(*vxladdr)); 3359 3360 if (sa->sa_family == AF_INET) { 3361 vxladdr->in4 = *satoconstsin(sa); 3362 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3363 } else if (sa->sa_family == AF_INET6) { 3364 vxladdr->in6 = *satoconstsin6(sa); 3365 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3366 } 3367 } 3368 3369 static int 3370 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, 3371 const struct sockaddr *sa) 3372 { 3373 int equal; 3374 3375 if (sa->sa_family == AF_INET) { 3376 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3377 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; 3378 } else if (sa->sa_family == AF_INET6) { 3379 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3380 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); 3381 } else 3382 equal = 0; 3383 3384 return (equal); 3385 } 3386 3387 static void 3388 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, 3389 const struct sockaddr *sa) 3390 { 3391 3392 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3393 3394 if (sa->sa_family == AF_INET) { 3395 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3396 vxladdr->in4.sin_family = AF_INET; 3397 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3398 vxladdr->in4.sin_addr = *in4; 3399 } else if (sa->sa_family == AF_INET6) { 3400 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3401 vxladdr->in6.sin6_family = AF_INET6; 3402 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3403 vxladdr->in6.sin6_addr = *in6; 3404 } 3405 } 3406 3407 static int 3408 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) 3409 { 3410 const struct sockaddr *sa; 3411 int supported; 3412 3413 sa = &vxladdr->sa; 3414 supported = 0; 3415 3416 if (sa->sa_family == AF_UNSPEC && unspec != 0) { 3417 supported = 1; 3418 } else if (sa->sa_family == AF_INET) { 3419 #ifdef INET 3420 supported = 1; 3421 #endif 3422 } else if (sa->sa_family == AF_INET6) { 3423 #ifdef INET6 3424 supported = 1; 3425 #endif 3426 } 3427 3428 return (supported); 3429 } 3430 3431 static int 3432 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) 3433 { 3434 const struct sockaddr *sa; 3435 int any; 3436 3437 sa = &vxladdr->sa; 3438 3439 if (sa->sa_family == AF_INET) { 3440 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3441 any = in4->s_addr == INADDR_ANY; 3442 } else if (sa->sa_family == AF_INET6) { 3443 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3444 any = IN6_IS_ADDR_UNSPECIFIED(in6); 3445 } else 3446 any = -1; 3447 3448 return (any); 3449 } 3450 3451 static int 3452 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) 3453 { 3454 const struct sockaddr *sa; 3455 int mc; 3456 3457 sa = &vxladdr->sa; 3458 3459 if (sa->sa_family == AF_INET) { 3460 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3461 mc = IN_MULTICAST(ntohl(in4->s_addr)); 3462 } else if (sa->sa_family == AF_INET6) { 3463 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3464 mc = IN6_IS_ADDR_MULTICAST(in6); 3465 } else 3466 mc = -1; 3467 3468 return (mc); 3469 } 3470 3471 static int 3472 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) 3473 { 3474 int error; 3475 3476 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); 3477 #ifdef INET6 3478 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); 3479 #else 3480 error = EAFNOSUPPORT; 3481 #endif 3482 3483 return (error); 3484 } 3485 3486 static int 3487 vxlan_can_change_config(struct vxlan_softc *sc) 3488 { 3489 struct ifnet *ifp; 3490 3491 ifp = sc->vxl_ifp; 3492 VXLAN_LOCK_ASSERT(sc); 3493 3494 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3495 return (0); 3496 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) 3497 return (0); 3498 3499 return (1); 3500 } 3501 3502 static int 3503 vxlan_check_vni(uint32_t vni) 3504 { 3505 3506 return (vni >= VXLAN_VNI_MAX); 3507 } 3508 3509 static int 3510 vxlan_check_ttl(int ttl) 3511 { 3512 3513 return (ttl > MAXTTL); 3514 } 3515 3516 static int 3517 vxlan_check_ftable_timeout(uint32_t timeout) 3518 { 3519 3520 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); 3521 } 3522 3523 static int 3524 vxlan_check_ftable_max(uint32_t max) 3525 { 3526 3527 return (max > VXLAN_FTABLE_MAX); 3528 } 3529 3530 static void 3531 vxlan_sysctl_setup(struct vxlan_softc *sc) 3532 { 3533 struct sysctl_ctx_list *ctx; 3534 struct sysctl_oid *node; 3535 struct vxlan_statistics *stats; 3536 char namebuf[8]; 3537 3538 ctx = &sc->vxl_sysctl_ctx; 3539 stats = &sc->vxl_stats; 3540 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); 3541 3542 sysctl_ctx_init(ctx); 3543 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, 3544 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, 3545 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3546 3547 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3548 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3549 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", 3550 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, 3551 "Number of entries in forwarding table"); 3552 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", 3553 CTLFLAG_RD, &sc->vxl_ftable_max, 0, 3554 "Maximum number of entries allowed in forwarding table"); 3555 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", 3556 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, 3557 "Number of seconds between prunes of the forwarding table"); 3558 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", 3559 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 3560 sc, 0, vxlan_ftable_sysctl_dump, "A", 3561 "Dump the forwarding table entries"); 3562 3563 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3564 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3565 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3566 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, 3567 "Fowarding table reached maximum entries"); 3568 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3569 "ftable_lock_upgrade_failed", CTLFLAG_RD, 3570 &stats->ftable_lock_upgrade_failed, 0, 3571 "Forwarding table update required lock upgrade"); 3572 3573 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum", 3574 CTLFLAG_RD, &stats->txcsum, 3575 "# of times hardware assisted with tx checksum"); 3576 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso", 3577 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO"); 3578 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum", 3579 CTLFLAG_RD, &stats->rxcsum, 3580 "# of times hardware assisted with rx checksum"); 3581 } 3582 3583 static void 3584 vxlan_sysctl_destroy(struct vxlan_softc *sc) 3585 { 3586 3587 sysctl_ctx_free(&sc->vxl_sysctl_ctx); 3588 sc->vxl_sysctl_node = NULL; 3589 } 3590 3591 static int 3592 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) 3593 { 3594 char path[64]; 3595 3596 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", 3597 sc->vxl_unit, knob); 3598 TUNABLE_INT_FETCH(path, &def); 3599 3600 return (def); 3601 } 3602 3603 static void 3604 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) 3605 { 3606 struct vxlan_softc_head list; 3607 struct vxlan_socket *vso; 3608 struct vxlan_softc *sc, *tsc; 3609 3610 LIST_INIT(&list); 3611 3612 if (ifp->if_flags & IFF_RENAMING) 3613 return; 3614 if ((ifp->if_flags & IFF_MULTICAST) == 0) 3615 return; 3616 3617 VXLAN_LIST_LOCK(); 3618 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) 3619 vxlan_socket_ifdetach(vso, ifp, &list); 3620 VXLAN_LIST_UNLOCK(); 3621 3622 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { 3623 LIST_REMOVE(sc, vxl_ifdetach_list); 3624 3625 sx_xlock(&vxlan_sx); 3626 VXLAN_WLOCK(sc); 3627 if (sc->vxl_flags & VXLAN_FLAG_INIT) 3628 vxlan_init_wait(sc); 3629 vxlan_teardown_locked(sc); 3630 sx_xunlock(&vxlan_sx); 3631 } 3632 } 3633 3634 static void 3635 vxlan_load(void) 3636 { 3637 3638 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); 3639 LIST_INIT(&vxlan_socket_list); 3640 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 3641 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); 3642 vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create, 3643 vxlan_clone_destroy, 0); 3644 } 3645 3646 static void 3647 vxlan_unload(void) 3648 { 3649 3650 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 3651 vxlan_ifdetach_event_tag); 3652 if_clone_detach(vxlan_cloner); 3653 mtx_destroy(&vxlan_list_mtx); 3654 MPASS(LIST_EMPTY(&vxlan_socket_list)); 3655 } 3656 3657 static int 3658 vxlan_modevent(module_t mod, int type, void *unused) 3659 { 3660 int error; 3661 3662 error = 0; 3663 3664 switch (type) { 3665 case MOD_LOAD: 3666 vxlan_load(); 3667 break; 3668 case MOD_UNLOAD: 3669 vxlan_unload(); 3670 break; 3671 default: 3672 error = ENOTSUP; 3673 break; 3674 } 3675 3676 return (error); 3677 } 3678 3679 static moduledata_t vxlan_mod = { 3680 "if_vxlan", 3681 vxlan_modevent, 3682 0 3683 }; 3684 3685 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3686 MODULE_VERSION(if_vxlan, 1); 3687