1 /*- 2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * Copyright (c) 2020, Chelsio Communications. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 #include <sys/eventhandler.h> 34 #include <sys/kernel.h> 35 #include <sys/lock.h> 36 #include <sys/hash.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/module.h> 40 #include <sys/refcount.h> 41 #include <sys/rmlock.h> 42 #include <sys/priv.h> 43 #include <sys/proc.h> 44 #include <sys/queue.h> 45 #include <sys/sbuf.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sockio.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 52 #include <net/bpf.h> 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_private.h> 57 #include <net/if_clone.h> 58 #include <net/if_dl.h> 59 #include <net/if_media.h> 60 #include <net/if_types.h> 61 #include <net/if_vxlan.h> 62 #include <net/netisr.h> 63 #include <net/route.h> 64 #include <net/route/nhop.h> 65 66 #include <netinet/in.h> 67 #include <netinet/in_systm.h> 68 #include <netinet/in_var.h> 69 #include <netinet/in_pcb.h> 70 #include <netinet/ip.h> 71 #include <netinet/ip6.h> 72 #include <netinet/ip_var.h> 73 #include <netinet/udp.h> 74 #include <netinet/udp_var.h> 75 #include <netinet/in_fib.h> 76 #include <netinet6/in6_fib.h> 77 78 #include <netinet6/ip6_var.h> 79 #include <netinet6/scope6_var.h> 80 81 struct vxlan_softc; 82 LIST_HEAD(vxlan_softc_head, vxlan_softc); 83 84 struct sx vxlan_sx; 85 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock"); 86 87 struct vxlan_socket_mc_info { 88 union vxlan_sockaddr vxlsomc_saddr; 89 union vxlan_sockaddr vxlsomc_gaddr; 90 int vxlsomc_ifidx; 91 int vxlsomc_users; 92 }; 93 94 /* 95 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet. 96 */ 97 #define VXLAN_MAX_MTU (IP_MAXPACKET - \ 98 60 /* Maximum IPv4 header len */ - \ 99 sizeof(struct udphdr) - \ 100 sizeof(struct vxlan_header) - \ 101 ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) 102 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU) 103 104 #define VXLAN_SO_MC_MAX_GROUPS 32 105 106 #define VXLAN_SO_VNI_HASH_SHIFT 6 107 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) 108 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) 109 110 struct vxlan_socket { 111 struct socket *vxlso_sock; 112 struct rmlock vxlso_lock; 113 u_int vxlso_refcnt; 114 union vxlan_sockaddr vxlso_laddr; 115 LIST_ENTRY(vxlan_socket) vxlso_entry; 116 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; 117 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; 118 }; 119 120 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) 121 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) 122 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) 123 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) 124 #define VXLAN_SO_LOCK_ASSERT(_vso) \ 125 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) 126 #define VXLAN_SO_LOCK_WASSERT(_vso) \ 127 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) 128 129 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) 130 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) 131 132 struct vxlan_ftable_entry { 133 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; 134 uint16_t vxlfe_flags; 135 uint8_t vxlfe_mac[ETHER_ADDR_LEN]; 136 union vxlan_sockaddr vxlfe_raddr; 137 time_t vxlfe_expire; 138 }; 139 140 #define VXLAN_FE_FLAG_DYNAMIC 0x01 141 #define VXLAN_FE_FLAG_STATIC 0x02 142 143 #define VXLAN_FE_IS_DYNAMIC(_fe) \ 144 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) 145 146 #define VXLAN_SC_FTABLE_SHIFT 9 147 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) 148 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) 149 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ 150 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) 151 152 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); 153 154 struct vxlan_statistics { 155 uint32_t ftable_nospace; 156 uint32_t ftable_lock_upgrade_failed; 157 counter_u64_t txcsum; 158 counter_u64_t tso; 159 counter_u64_t rxcsum; 160 }; 161 162 struct vxlan_softc { 163 struct ifnet *vxl_ifp; 164 int vxl_reqcap; 165 u_int vxl_fibnum; 166 struct vxlan_socket *vxl_sock; 167 uint32_t vxl_vni; 168 union vxlan_sockaddr vxl_src_addr; 169 union vxlan_sockaddr vxl_dst_addr; 170 uint32_t vxl_flags; 171 #define VXLAN_FLAG_INIT 0x0001 172 #define VXLAN_FLAG_TEARDOWN 0x0002 173 #define VXLAN_FLAG_LEARN 0x0004 174 #define VXLAN_FLAG_USER_MTU 0x0008 175 176 uint32_t vxl_port_hash_key; 177 uint16_t vxl_min_port; 178 uint16_t vxl_max_port; 179 uint8_t vxl_ttl; 180 181 /* Lookup table from MAC address to forwarding entry. */ 182 uint32_t vxl_ftable_cnt; 183 uint32_t vxl_ftable_max; 184 uint32_t vxl_ftable_timeout; 185 uint32_t vxl_ftable_hash_key; 186 struct vxlan_ftable_head *vxl_ftable; 187 188 /* Derived from vxl_dst_addr. */ 189 struct vxlan_ftable_entry vxl_default_fe; 190 191 struct ip_moptions *vxl_im4o; 192 struct ip6_moptions *vxl_im6o; 193 194 struct rmlock vxl_lock; 195 volatile u_int vxl_refcnt; 196 197 int vxl_unit; 198 int vxl_vso_mc_index; 199 struct vxlan_statistics vxl_stats; 200 struct sysctl_oid *vxl_sysctl_node; 201 struct sysctl_ctx_list vxl_sysctl_ctx; 202 struct callout vxl_callout; 203 struct ether_addr vxl_hwaddr; 204 int vxl_mc_ifindex; 205 struct ifnet *vxl_mc_ifp; 206 struct ifmedia vxl_media; 207 char vxl_mc_ifname[IFNAMSIZ]; 208 LIST_ENTRY(vxlan_softc) vxl_entry; 209 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; 210 211 /* For rate limiting errors on the tx fast path. */ 212 struct timeval err_time; 213 int err_pps; 214 }; 215 216 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) 217 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) 218 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) 219 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) 220 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) 221 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) 222 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) 223 #define VXLAN_UNLOCK(_sc, _p) do { \ 224 if (VXLAN_LOCK_WOWNED(_sc)) \ 225 VXLAN_WUNLOCK(_sc); \ 226 else \ 227 VXLAN_RUNLOCK(_sc, _p); \ 228 } while (0) 229 230 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) 231 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) 232 233 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) 234 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) 235 236 struct vxlanudphdr { 237 struct udphdr vxlh_udp; 238 struct vxlan_header vxlh_hdr; 239 } __packed; 240 241 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); 242 static void vxlan_ftable_init(struct vxlan_softc *); 243 static void vxlan_ftable_fini(struct vxlan_softc *); 244 static void vxlan_ftable_flush(struct vxlan_softc *, int); 245 static void vxlan_ftable_expire(struct vxlan_softc *); 246 static int vxlan_ftable_update_locked(struct vxlan_softc *, 247 const union vxlan_sockaddr *, const uint8_t *, 248 struct rm_priotracker *); 249 static int vxlan_ftable_learn(struct vxlan_softc *, 250 const struct sockaddr *, const uint8_t *); 251 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); 252 253 static struct vxlan_ftable_entry * 254 vxlan_ftable_entry_alloc(void); 255 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); 256 static void vxlan_ftable_entry_init(struct vxlan_softc *, 257 struct vxlan_ftable_entry *, const uint8_t *, 258 const struct sockaddr *, uint32_t); 259 static void vxlan_ftable_entry_destroy(struct vxlan_softc *, 260 struct vxlan_ftable_entry *); 261 static int vxlan_ftable_entry_insert(struct vxlan_softc *, 262 struct vxlan_ftable_entry *); 263 static struct vxlan_ftable_entry * 264 vxlan_ftable_entry_lookup(struct vxlan_softc *, 265 const uint8_t *); 266 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, 267 struct sbuf *); 268 269 static struct vxlan_socket * 270 vxlan_socket_alloc(const union vxlan_sockaddr *); 271 static void vxlan_socket_destroy(struct vxlan_socket *); 272 static void vxlan_socket_release(struct vxlan_socket *); 273 static struct vxlan_socket * 274 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); 275 static void vxlan_socket_insert(struct vxlan_socket *); 276 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); 277 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); 278 static int vxlan_socket_create(struct ifnet *, int, 279 const union vxlan_sockaddr *, struct vxlan_socket **); 280 static void vxlan_socket_ifdetach(struct vxlan_socket *, 281 struct ifnet *, struct vxlan_softc_head *); 282 283 static struct vxlan_socket * 284 vxlan_socket_mc_lookup(const union vxlan_sockaddr *); 285 static int vxlan_sockaddr_mc_info_match( 286 const struct vxlan_socket_mc_info *, 287 const union vxlan_sockaddr *, 288 const union vxlan_sockaddr *, int); 289 static int vxlan_socket_mc_join_group(struct vxlan_socket *, 290 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 291 int *, union vxlan_sockaddr *); 292 static int vxlan_socket_mc_leave_group(struct vxlan_socket *, 293 const union vxlan_sockaddr *, 294 const union vxlan_sockaddr *, int); 295 static int vxlan_socket_mc_add_group(struct vxlan_socket *, 296 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 297 int, int *); 298 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, 299 int); 300 301 static struct vxlan_softc * 302 vxlan_socket_lookup_softc_locked(struct vxlan_socket *, 303 uint32_t); 304 static struct vxlan_softc * 305 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); 306 static int vxlan_socket_insert_softc(struct vxlan_socket *, 307 struct vxlan_softc *); 308 static void vxlan_socket_remove_softc(struct vxlan_socket *, 309 struct vxlan_softc *); 310 311 static struct ifnet * 312 vxlan_multicast_if_ref(struct vxlan_softc *, int); 313 static void vxlan_free_multicast(struct vxlan_softc *); 314 static int vxlan_setup_multicast_interface(struct vxlan_softc *); 315 316 static int vxlan_setup_multicast(struct vxlan_softc *); 317 static int vxlan_setup_socket(struct vxlan_softc *); 318 #ifdef INET6 319 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *); 320 #endif 321 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *); 322 static int vxlan_valid_init_config(struct vxlan_softc *); 323 static void vxlan_init_wait(struct vxlan_softc *); 324 static void vxlan_init_complete(struct vxlan_softc *); 325 static void vxlan_init(void *); 326 static void vxlan_release(struct vxlan_softc *); 327 static void vxlan_teardown_wait(struct vxlan_softc *); 328 static void vxlan_teardown_complete(struct vxlan_softc *); 329 static void vxlan_teardown_locked(struct vxlan_softc *); 330 static void vxlan_teardown(struct vxlan_softc *); 331 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, 332 struct vxlan_softc_head *); 333 static void vxlan_timer(void *); 334 335 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); 336 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); 337 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); 338 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); 339 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); 340 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); 341 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); 342 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); 343 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); 344 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); 345 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); 346 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); 347 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); 348 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); 349 static int vxlan_ctrl_flush(struct vxlan_softc *, void *); 350 static int vxlan_ioctl_drvspec(struct vxlan_softc *, 351 struct ifdrv *, int); 352 static int vxlan_ioctl_ifflags(struct vxlan_softc *); 353 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 354 355 #if defined(INET) || defined(INET6) 356 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); 357 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, 358 int, uint16_t, uint16_t); 359 #endif 360 static int vxlan_encap4(struct vxlan_softc *, 361 const union vxlan_sockaddr *, struct mbuf *); 362 static int vxlan_encap6(struct vxlan_softc *, 363 const union vxlan_sockaddr *, struct mbuf *); 364 static int vxlan_transmit(struct ifnet *, struct mbuf *); 365 static void vxlan_qflush(struct ifnet *); 366 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, 367 const struct sockaddr *, void *); 368 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, 369 const struct sockaddr *); 370 371 static int vxlan_stats_alloc(struct vxlan_softc *); 372 static void vxlan_stats_free(struct vxlan_softc *); 373 static void vxlan_set_default_config(struct vxlan_softc *); 374 static int vxlan_set_user_config(struct vxlan_softc *, 375 struct ifvxlanparam *); 376 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int); 377 static void vxlan_set_hwcaps(struct vxlan_softc *); 378 static int vxlan_clone_create(struct if_clone *, char *, size_t, 379 struct ifc_data *, struct ifnet **); 380 static int vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 381 382 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); 383 static int vxlan_media_change(struct ifnet *); 384 static void vxlan_media_status(struct ifnet *, struct ifmediareq *); 385 386 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, 387 const struct sockaddr *); 388 static void vxlan_sockaddr_copy(union vxlan_sockaddr *, 389 const struct sockaddr *); 390 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, 391 const struct sockaddr *); 392 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, 393 const struct sockaddr *); 394 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); 395 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); 396 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); 397 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); 398 399 static int vxlan_can_change_config(struct vxlan_softc *); 400 static int vxlan_check_vni(uint32_t); 401 static int vxlan_check_ttl(int); 402 static int vxlan_check_ftable_timeout(uint32_t); 403 static int vxlan_check_ftable_max(uint32_t); 404 405 static void vxlan_sysctl_setup(struct vxlan_softc *); 406 static void vxlan_sysctl_destroy(struct vxlan_softc *); 407 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); 408 409 static void vxlan_ifdetach_event(void *, struct ifnet *); 410 static void vxlan_load(void); 411 static void vxlan_unload(void); 412 static int vxlan_modevent(module_t, int, void *); 413 414 static const char vxlan_name[] = "vxlan"; 415 static MALLOC_DEFINE(M_VXLAN, vxlan_name, 416 "Virtual eXtensible LAN Interface"); 417 static struct if_clone *vxlan_cloner; 418 419 static struct mtx vxlan_list_mtx; 420 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) 421 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) 422 423 static LIST_HEAD(, vxlan_socket) vxlan_socket_list; 424 425 static eventhandler_tag vxlan_ifdetach_event_tag; 426 427 SYSCTL_DECL(_net_link); 428 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 429 "Virtual eXtensible Local Area Network"); 430 431 static int vxlan_legacy_port = 0; 432 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port); 433 static int vxlan_reuse_port = 0; 434 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port); 435 436 /* Default maximum number of addresses in the forwarding table. */ 437 #ifndef VXLAN_FTABLE_MAX 438 #define VXLAN_FTABLE_MAX 2000 439 #endif 440 441 /* Timeout (in seconds) of addresses learned in the forwarding table. */ 442 #ifndef VXLAN_FTABLE_TIMEOUT 443 #define VXLAN_FTABLE_TIMEOUT (20 * 60) 444 #endif 445 446 /* 447 * Maximum timeout (in seconds) of addresses learned in the forwarding 448 * table. 449 */ 450 #ifndef VXLAN_FTABLE_MAX_TIMEOUT 451 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) 452 #endif 453 454 /* Number of seconds between pruning attempts of the forwarding table. */ 455 #ifndef VXLAN_FTABLE_PRUNE 456 #define VXLAN_FTABLE_PRUNE (5 * 60) 457 #endif 458 459 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; 460 461 struct vxlan_control { 462 int (*vxlc_func)(struct vxlan_softc *, void *); 463 int vxlc_argsize; 464 int vxlc_flags; 465 #define VXLAN_CTRL_FLAG_COPYIN 0x01 466 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 467 #define VXLAN_CTRL_FLAG_SUSER 0x04 468 }; 469 470 static const struct vxlan_control vxlan_control_table[] = { 471 [VXLAN_CMD_GET_CONFIG] = 472 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), 473 VXLAN_CTRL_FLAG_COPYOUT 474 }, 475 476 [VXLAN_CMD_SET_VNI] = 477 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), 478 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 479 }, 480 481 [VXLAN_CMD_SET_LOCAL_ADDR] = 482 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), 483 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 484 }, 485 486 [VXLAN_CMD_SET_REMOTE_ADDR] = 487 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), 488 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 489 }, 490 491 [VXLAN_CMD_SET_LOCAL_PORT] = 492 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), 493 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 494 }, 495 496 [VXLAN_CMD_SET_REMOTE_PORT] = 497 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), 498 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 499 }, 500 501 [VXLAN_CMD_SET_PORT_RANGE] = 502 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), 503 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 504 }, 505 506 [VXLAN_CMD_SET_FTABLE_TIMEOUT] = 507 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), 508 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 509 }, 510 511 [VXLAN_CMD_SET_FTABLE_MAX] = 512 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), 513 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 514 }, 515 516 [VXLAN_CMD_SET_MULTICAST_IF] = 517 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), 518 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 519 }, 520 521 [VXLAN_CMD_SET_TTL] = 522 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), 523 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 524 }, 525 526 [VXLAN_CMD_SET_LEARN] = 527 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), 528 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 529 }, 530 531 [VXLAN_CMD_FTABLE_ENTRY_ADD] = 532 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), 533 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 534 }, 535 536 [VXLAN_CMD_FTABLE_ENTRY_REM] = 537 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), 538 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 539 }, 540 541 [VXLAN_CMD_FLUSH] = 542 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), 543 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 544 }, 545 }; 546 547 static const int vxlan_control_table_size = nitems(vxlan_control_table); 548 549 static int 550 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) 551 { 552 int i, d; 553 554 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) 555 d = ((int)a[i]) - ((int)b[i]); 556 557 return (d); 558 } 559 560 static void 561 vxlan_ftable_init(struct vxlan_softc *sc) 562 { 563 int i; 564 565 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * 566 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); 567 568 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) 569 LIST_INIT(&sc->vxl_ftable[i]); 570 sc->vxl_ftable_hash_key = arc4random(); 571 } 572 573 static void 574 vxlan_ftable_fini(struct vxlan_softc *sc) 575 { 576 int i; 577 578 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 579 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), 580 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); 581 } 582 MPASS(sc->vxl_ftable_cnt == 0); 583 584 free(sc->vxl_ftable, M_VXLAN); 585 sc->vxl_ftable = NULL; 586 } 587 588 static void 589 vxlan_ftable_flush(struct vxlan_softc *sc, int all) 590 { 591 struct vxlan_ftable_entry *fe, *tfe; 592 int i; 593 594 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 595 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 596 if (all || VXLAN_FE_IS_DYNAMIC(fe)) 597 vxlan_ftable_entry_destroy(sc, fe); 598 } 599 } 600 } 601 602 static void 603 vxlan_ftable_expire(struct vxlan_softc *sc) 604 { 605 struct vxlan_ftable_entry *fe, *tfe; 606 int i; 607 608 VXLAN_LOCK_WASSERT(sc); 609 610 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 611 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 612 if (VXLAN_FE_IS_DYNAMIC(fe) && 613 time_uptime >= fe->vxlfe_expire) 614 vxlan_ftable_entry_destroy(sc, fe); 615 } 616 } 617 } 618 619 static int 620 vxlan_ftable_update_locked(struct vxlan_softc *sc, 621 const union vxlan_sockaddr *vxlsa, const uint8_t *mac, 622 struct rm_priotracker *tracker) 623 { 624 struct vxlan_ftable_entry *fe; 625 int error __unused; 626 627 VXLAN_LOCK_ASSERT(sc); 628 629 again: 630 /* 631 * A forwarding entry for this MAC address might already exist. If 632 * so, update it, otherwise create a new one. We may have to upgrade 633 * the lock if we have to change or create an entry. 634 */ 635 fe = vxlan_ftable_entry_lookup(sc, mac); 636 if (fe != NULL) { 637 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 638 639 if (!VXLAN_FE_IS_DYNAMIC(fe) || 640 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) 641 return (0); 642 if (!VXLAN_LOCK_WOWNED(sc)) { 643 VXLAN_RUNLOCK(sc, tracker); 644 VXLAN_WLOCK(sc); 645 sc->vxl_stats.ftable_lock_upgrade_failed++; 646 goto again; 647 } 648 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); 649 return (0); 650 } 651 652 if (!VXLAN_LOCK_WOWNED(sc)) { 653 VXLAN_RUNLOCK(sc, tracker); 654 VXLAN_WLOCK(sc); 655 sc->vxl_stats.ftable_lock_upgrade_failed++; 656 goto again; 657 } 658 659 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { 660 sc->vxl_stats.ftable_nospace++; 661 return (ENOSPC); 662 } 663 664 fe = vxlan_ftable_entry_alloc(); 665 if (fe == NULL) 666 return (ENOMEM); 667 668 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); 669 670 /* The prior lookup failed, so the insert should not. */ 671 error = vxlan_ftable_entry_insert(sc, fe); 672 MPASS(error == 0); 673 674 return (0); 675 } 676 677 static int 678 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, 679 const uint8_t *mac) 680 { 681 struct rm_priotracker tracker; 682 union vxlan_sockaddr vxlsa; 683 int error; 684 685 /* 686 * The source port may be randomly selected by the remote host, so 687 * use the port of the default destination address. 688 */ 689 vxlan_sockaddr_copy(&vxlsa, sa); 690 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 691 692 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 693 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 694 if (error) 695 return (error); 696 } 697 698 VXLAN_RLOCK(sc, &tracker); 699 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); 700 VXLAN_UNLOCK(sc, &tracker); 701 702 return (error); 703 } 704 705 static int 706 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) 707 { 708 struct rm_priotracker tracker; 709 struct sbuf sb; 710 struct vxlan_softc *sc; 711 struct vxlan_ftable_entry *fe; 712 size_t size; 713 int i, error; 714 715 /* 716 * This is mostly intended for debugging during development. It is 717 * not practical to dump an entire large table this way. 718 */ 719 720 sc = arg1; 721 size = PAGE_SIZE; /* Calculate later. */ 722 723 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); 724 sbuf_putc(&sb, '\n'); 725 726 VXLAN_RLOCK(sc, &tracker); 727 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 728 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { 729 if (sbuf_error(&sb) != 0) 730 break; 731 vxlan_ftable_entry_dump(fe, &sb); 732 } 733 } 734 VXLAN_RUNLOCK(sc, &tracker); 735 736 if (sbuf_len(&sb) == 1) 737 sbuf_setpos(&sb, 0); 738 739 sbuf_finish(&sb); 740 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 741 sbuf_delete(&sb); 742 743 return (error); 744 } 745 746 static struct vxlan_ftable_entry * 747 vxlan_ftable_entry_alloc(void) 748 { 749 struct vxlan_ftable_entry *fe; 750 751 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); 752 753 return (fe); 754 } 755 756 static void 757 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) 758 { 759 760 free(fe, M_VXLAN); 761 } 762 763 static void 764 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, 765 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) 766 { 767 768 fe->vxlfe_flags = flags; 769 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 770 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); 771 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); 772 } 773 774 static void 775 vxlan_ftable_entry_destroy(struct vxlan_softc *sc, 776 struct vxlan_ftable_entry *fe) 777 { 778 779 sc->vxl_ftable_cnt--; 780 LIST_REMOVE(fe, vxlfe_hash); 781 vxlan_ftable_entry_free(fe); 782 } 783 784 static int 785 vxlan_ftable_entry_insert(struct vxlan_softc *sc, 786 struct vxlan_ftable_entry *fe) 787 { 788 struct vxlan_ftable_entry *lfe; 789 uint32_t hash; 790 int dir; 791 792 VXLAN_LOCK_WASSERT(sc); 793 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); 794 795 lfe = LIST_FIRST(&sc->vxl_ftable[hash]); 796 if (lfe == NULL) { 797 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); 798 goto out; 799 } 800 801 do { 802 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); 803 if (dir == 0) 804 return (EEXIST); 805 if (dir > 0) { 806 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); 807 goto out; 808 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { 809 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); 810 goto out; 811 } else 812 lfe = LIST_NEXT(lfe, vxlfe_hash); 813 } while (lfe != NULL); 814 815 out: 816 sc->vxl_ftable_cnt++; 817 818 return (0); 819 } 820 821 static struct vxlan_ftable_entry * 822 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) 823 { 824 struct vxlan_ftable_entry *fe; 825 uint32_t hash; 826 int dir; 827 828 VXLAN_LOCK_ASSERT(sc); 829 hash = VXLAN_SC_FTABLE_HASH(sc, mac); 830 831 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { 832 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); 833 if (dir == 0) 834 return (fe); 835 if (dir > 0) 836 break; 837 } 838 839 return (NULL); 840 } 841 842 static void 843 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) 844 { 845 char buf[64]; 846 const union vxlan_sockaddr *sa; 847 const void *addr; 848 int i, len, af, width; 849 850 sa = &fe->vxlfe_raddr; 851 af = sa->sa.sa_family; 852 len = sbuf_len(sb); 853 854 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', 855 fe->vxlfe_flags); 856 857 for (i = 0; i < ETHER_ADDR_LEN - 1; i++) 858 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); 859 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); 860 861 if (af == AF_INET) { 862 addr = &sa->in4.sin_addr; 863 width = INET_ADDRSTRLEN - 1; 864 } else { 865 addr = &sa->in6.sin6_addr; 866 width = INET6_ADDRSTRLEN - 1; 867 } 868 inet_ntop(af, addr, buf, sizeof(buf)); 869 sbuf_printf(sb, "%*s ", width, buf); 870 871 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); 872 873 sbuf_putc(sb, '\n'); 874 875 /* Truncate a partial line. */ 876 if (sbuf_error(sb) != 0) 877 sbuf_setpos(sb, len); 878 } 879 880 static struct vxlan_socket * 881 vxlan_socket_alloc(const union vxlan_sockaddr *sa) 882 { 883 struct vxlan_socket *vso; 884 int i; 885 886 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); 887 rm_init(&vso->vxlso_lock, "vxlansorm"); 888 refcount_init(&vso->vxlso_refcnt, 0); 889 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) 890 LIST_INIT(&vso->vxlso_vni_hash[i]); 891 vso->vxlso_laddr = *sa; 892 893 return (vso); 894 } 895 896 static void 897 vxlan_socket_destroy(struct vxlan_socket *vso) 898 { 899 struct socket *so; 900 #ifdef INVARIANTS 901 int i; 902 struct vxlan_socket_mc_info *mc; 903 904 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 905 mc = &vso->vxlso_mc[i]; 906 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, 907 ("%s: socket %p mc[%d] still has address", 908 __func__, vso, i)); 909 } 910 911 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 912 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), 913 ("%s: socket %p vni_hash[%d] not empty", 914 __func__, vso, i)); 915 } 916 #endif 917 so = vso->vxlso_sock; 918 if (so != NULL) { 919 vso->vxlso_sock = NULL; 920 soclose(so); 921 } 922 923 rm_destroy(&vso->vxlso_lock); 924 free(vso, M_VXLAN); 925 } 926 927 static void 928 vxlan_socket_release(struct vxlan_socket *vso) 929 { 930 int destroy; 931 932 VXLAN_LIST_LOCK(); 933 destroy = VXLAN_SO_RELEASE(vso); 934 if (destroy != 0) 935 LIST_REMOVE(vso, vxlso_entry); 936 VXLAN_LIST_UNLOCK(); 937 938 if (destroy != 0) 939 vxlan_socket_destroy(vso); 940 } 941 942 static struct vxlan_socket * 943 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) 944 { 945 struct vxlan_socket *vso; 946 947 VXLAN_LIST_LOCK(); 948 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { 949 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { 950 VXLAN_SO_ACQUIRE(vso); 951 break; 952 } 953 } 954 VXLAN_LIST_UNLOCK(); 955 956 return (vso); 957 } 958 959 static void 960 vxlan_socket_insert(struct vxlan_socket *vso) 961 { 962 963 VXLAN_LIST_LOCK(); 964 VXLAN_SO_ACQUIRE(vso); 965 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); 966 VXLAN_LIST_UNLOCK(); 967 } 968 969 static int 970 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) 971 { 972 struct thread *td; 973 int error; 974 975 td = curthread; 976 977 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, 978 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); 979 if (error) { 980 if_printf(ifp, "cannot create socket: %d\n", error); 981 return (error); 982 } 983 984 error = udp_set_kernel_tunneling(vso->vxlso_sock, 985 vxlan_rcv_udp_packet, NULL, vso); 986 if (error) { 987 if_printf(ifp, "cannot set tunneling function: %d\n", error); 988 return (error); 989 } 990 991 if (vxlan_reuse_port != 0) { 992 struct sockopt sopt; 993 int val = 1; 994 995 bzero(&sopt, sizeof(sopt)); 996 sopt.sopt_dir = SOPT_SET; 997 sopt.sopt_level = IPPROTO_IP; 998 sopt.sopt_name = SO_REUSEPORT; 999 sopt.sopt_val = &val; 1000 sopt.sopt_valsize = sizeof(val); 1001 error = sosetopt(vso->vxlso_sock, &sopt); 1002 if (error) { 1003 if_printf(ifp, 1004 "cannot set REUSEADDR socket opt: %d\n", error); 1005 return (error); 1006 } 1007 } 1008 1009 return (0); 1010 } 1011 1012 static int 1013 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) 1014 { 1015 union vxlan_sockaddr laddr; 1016 struct thread *td; 1017 int error; 1018 1019 td = curthread; 1020 laddr = vso->vxlso_laddr; 1021 1022 error = sobind(vso->vxlso_sock, &laddr.sa, td); 1023 if (error) { 1024 if (error != EADDRINUSE) 1025 if_printf(ifp, "cannot bind socket: %d\n", error); 1026 return (error); 1027 } 1028 1029 return (0); 1030 } 1031 1032 static int 1033 vxlan_socket_create(struct ifnet *ifp, int multicast, 1034 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) 1035 { 1036 union vxlan_sockaddr laddr; 1037 struct vxlan_socket *vso; 1038 int error; 1039 1040 laddr = *saddr; 1041 1042 /* 1043 * If this socket will be multicast, then only the local port 1044 * must be specified when binding. 1045 */ 1046 if (multicast != 0) { 1047 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1048 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1049 #ifdef INET6 1050 else 1051 laddr.in6.sin6_addr = in6addr_any; 1052 #endif 1053 } 1054 1055 vso = vxlan_socket_alloc(&laddr); 1056 if (vso == NULL) 1057 return (ENOMEM); 1058 1059 error = vxlan_socket_init(vso, ifp); 1060 if (error) 1061 goto fail; 1062 1063 error = vxlan_socket_bind(vso, ifp); 1064 if (error) 1065 goto fail; 1066 1067 /* 1068 * There is a small window between the bind completing and 1069 * inserting the socket, so that a concurrent create may fail. 1070 * Let's not worry about that for now. 1071 */ 1072 vxlan_socket_insert(vso); 1073 *vsop = vso; 1074 1075 return (0); 1076 1077 fail: 1078 vxlan_socket_destroy(vso); 1079 1080 return (error); 1081 } 1082 1083 static void 1084 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, 1085 struct vxlan_softc_head *list) 1086 { 1087 struct rm_priotracker tracker; 1088 struct vxlan_softc *sc; 1089 int i; 1090 1091 VXLAN_SO_RLOCK(vso, &tracker); 1092 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 1093 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) 1094 vxlan_ifdetach(sc, ifp, list); 1095 } 1096 VXLAN_SO_RUNLOCK(vso, &tracker); 1097 } 1098 1099 static struct vxlan_socket * 1100 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) 1101 { 1102 union vxlan_sockaddr laddr; 1103 struct vxlan_socket *vso; 1104 1105 laddr = *vxlsa; 1106 1107 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1108 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1109 #ifdef INET6 1110 else 1111 laddr.in6.sin6_addr = in6addr_any; 1112 #endif 1113 1114 vso = vxlan_socket_lookup(&laddr); 1115 1116 return (vso); 1117 } 1118 1119 static int 1120 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, 1121 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1122 int ifidx) 1123 { 1124 1125 if (!vxlan_sockaddr_in_any(local) && 1126 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) 1127 return (0); 1128 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) 1129 return (0); 1130 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) 1131 return (0); 1132 1133 return (1); 1134 } 1135 1136 static int 1137 vxlan_socket_mc_join_group(struct vxlan_socket *vso, 1138 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1139 int *ifidx, union vxlan_sockaddr *source) 1140 { 1141 struct sockopt sopt; 1142 int error; 1143 1144 *source = *local; 1145 1146 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1147 struct ip_mreq mreq; 1148 1149 mreq.imr_multiaddr = group->in4.sin_addr; 1150 mreq.imr_interface = local->in4.sin_addr; 1151 1152 bzero(&sopt, sizeof(sopt)); 1153 sopt.sopt_dir = SOPT_SET; 1154 sopt.sopt_level = IPPROTO_IP; 1155 sopt.sopt_name = IP_ADD_MEMBERSHIP; 1156 sopt.sopt_val = &mreq; 1157 sopt.sopt_valsize = sizeof(mreq); 1158 error = sosetopt(vso->vxlso_sock, &sopt); 1159 if (error) 1160 return (error); 1161 1162 /* 1163 * BMV: Ideally, there would be a formal way for us to get 1164 * the local interface that was selected based on the 1165 * imr_interface address. We could then update *ifidx so 1166 * vxlan_sockaddr_mc_info_match() would return a match for 1167 * later creates that explicitly set the multicast interface. 1168 * 1169 * If we really need to, we can of course look in the INP's 1170 * membership list: 1171 * sotoinpcb(vso->vxlso_sock)->inp_moptions-> 1172 * imo_head[]->imf_inm->inm_ifp 1173 * similarly to imo_match_group(). 1174 */ 1175 source->in4.sin_addr = local->in4.sin_addr; 1176 1177 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1178 struct ipv6_mreq mreq; 1179 1180 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1181 mreq.ipv6mr_interface = *ifidx; 1182 1183 bzero(&sopt, sizeof(sopt)); 1184 sopt.sopt_dir = SOPT_SET; 1185 sopt.sopt_level = IPPROTO_IPV6; 1186 sopt.sopt_name = IPV6_JOIN_GROUP; 1187 sopt.sopt_val = &mreq; 1188 sopt.sopt_valsize = sizeof(mreq); 1189 error = sosetopt(vso->vxlso_sock, &sopt); 1190 if (error) 1191 return (error); 1192 1193 /* 1194 * BMV: As with IPv4, we would really like to know what 1195 * interface in6p_lookup_mcast_ifp() selected. 1196 */ 1197 } else 1198 error = EAFNOSUPPORT; 1199 1200 return (error); 1201 } 1202 1203 static int 1204 vxlan_socket_mc_leave_group(struct vxlan_socket *vso, 1205 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, 1206 int ifidx) 1207 { 1208 struct sockopt sopt; 1209 int error; 1210 1211 bzero(&sopt, sizeof(sopt)); 1212 sopt.sopt_dir = SOPT_SET; 1213 1214 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1215 struct ip_mreq mreq; 1216 1217 mreq.imr_multiaddr = group->in4.sin_addr; 1218 mreq.imr_interface = source->in4.sin_addr; 1219 1220 sopt.sopt_level = IPPROTO_IP; 1221 sopt.sopt_name = IP_DROP_MEMBERSHIP; 1222 sopt.sopt_val = &mreq; 1223 sopt.sopt_valsize = sizeof(mreq); 1224 error = sosetopt(vso->vxlso_sock, &sopt); 1225 1226 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1227 struct ipv6_mreq mreq; 1228 1229 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1230 mreq.ipv6mr_interface = ifidx; 1231 1232 sopt.sopt_level = IPPROTO_IPV6; 1233 sopt.sopt_name = IPV6_LEAVE_GROUP; 1234 sopt.sopt_val = &mreq; 1235 sopt.sopt_valsize = sizeof(mreq); 1236 error = sosetopt(vso->vxlso_sock, &sopt); 1237 1238 } else 1239 error = EAFNOSUPPORT; 1240 1241 return (error); 1242 } 1243 1244 static int 1245 vxlan_socket_mc_add_group(struct vxlan_socket *vso, 1246 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1247 int ifidx, int *idx) 1248 { 1249 union vxlan_sockaddr source; 1250 struct vxlan_socket_mc_info *mc; 1251 int i, empty, error; 1252 1253 /* 1254 * Within a socket, the same multicast group may be used by multiple 1255 * interfaces, each with a different network identifier. But a socket 1256 * may only join a multicast group once, so keep track of the users 1257 * here. 1258 */ 1259 1260 VXLAN_SO_WLOCK(vso); 1261 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1262 mc = &vso->vxlso_mc[i]; 1263 1264 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1265 empty++; 1266 continue; 1267 } 1268 1269 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) 1270 goto out; 1271 } 1272 VXLAN_SO_WUNLOCK(vso); 1273 1274 if (empty == 0) 1275 return (ENOSPC); 1276 1277 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); 1278 if (error) 1279 return (error); 1280 1281 VXLAN_SO_WLOCK(vso); 1282 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1283 mc = &vso->vxlso_mc[i]; 1284 1285 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1286 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); 1287 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); 1288 mc->vxlsomc_ifidx = ifidx; 1289 goto out; 1290 } 1291 } 1292 VXLAN_SO_WUNLOCK(vso); 1293 1294 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); 1295 MPASS(error == 0); 1296 1297 return (ENOSPC); 1298 1299 out: 1300 mc->vxlsomc_users++; 1301 VXLAN_SO_WUNLOCK(vso); 1302 1303 *idx = i; 1304 1305 return (0); 1306 } 1307 1308 static void 1309 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) 1310 { 1311 union vxlan_sockaddr group, source; 1312 struct vxlan_socket_mc_info *mc; 1313 int ifidx, leave; 1314 1315 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, 1316 ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); 1317 1318 leave = 0; 1319 mc = &vso->vxlso_mc[idx]; 1320 1321 VXLAN_SO_WLOCK(vso); 1322 mc->vxlsomc_users--; 1323 if (mc->vxlsomc_users == 0) { 1324 group = mc->vxlsomc_gaddr; 1325 source = mc->vxlsomc_saddr; 1326 ifidx = mc->vxlsomc_ifidx; 1327 bzero(mc, sizeof(*mc)); 1328 leave = 1; 1329 } 1330 VXLAN_SO_WUNLOCK(vso); 1331 1332 if (leave != 0) { 1333 /* 1334 * Our socket's membership in this group may have already 1335 * been removed if we joined through an interface that's 1336 * been detached. 1337 */ 1338 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); 1339 } 1340 } 1341 1342 static struct vxlan_softc * 1343 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) 1344 { 1345 struct vxlan_softc *sc; 1346 uint32_t hash; 1347 1348 VXLAN_SO_LOCK_ASSERT(vso); 1349 hash = VXLAN_SO_VNI_HASH(vni); 1350 1351 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { 1352 if (sc->vxl_vni == vni) { 1353 VXLAN_ACQUIRE(sc); 1354 break; 1355 } 1356 } 1357 1358 return (sc); 1359 } 1360 1361 static struct vxlan_softc * 1362 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) 1363 { 1364 struct rm_priotracker tracker; 1365 struct vxlan_softc *sc; 1366 1367 VXLAN_SO_RLOCK(vso, &tracker); 1368 sc = vxlan_socket_lookup_softc_locked(vso, vni); 1369 VXLAN_SO_RUNLOCK(vso, &tracker); 1370 1371 return (sc); 1372 } 1373 1374 static int 1375 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1376 { 1377 struct vxlan_softc *tsc; 1378 uint32_t vni, hash; 1379 1380 vni = sc->vxl_vni; 1381 hash = VXLAN_SO_VNI_HASH(vni); 1382 1383 VXLAN_SO_WLOCK(vso); 1384 tsc = vxlan_socket_lookup_softc_locked(vso, vni); 1385 if (tsc != NULL) { 1386 VXLAN_SO_WUNLOCK(vso); 1387 vxlan_release(tsc); 1388 return (EEXIST); 1389 } 1390 1391 VXLAN_ACQUIRE(sc); 1392 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); 1393 VXLAN_SO_WUNLOCK(vso); 1394 1395 return (0); 1396 } 1397 1398 static void 1399 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1400 { 1401 1402 VXLAN_SO_WLOCK(vso); 1403 LIST_REMOVE(sc, vxl_entry); 1404 VXLAN_SO_WUNLOCK(vso); 1405 1406 vxlan_release(sc); 1407 } 1408 1409 static struct ifnet * 1410 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) 1411 { 1412 struct ifnet *ifp; 1413 1414 VXLAN_LOCK_ASSERT(sc); 1415 1416 if (ipv4 && sc->vxl_im4o != NULL) 1417 ifp = sc->vxl_im4o->imo_multicast_ifp; 1418 else if (!ipv4 && sc->vxl_im6o != NULL) 1419 ifp = sc->vxl_im6o->im6o_multicast_ifp; 1420 else 1421 ifp = NULL; 1422 1423 if (ifp != NULL) 1424 if_ref(ifp); 1425 1426 return (ifp); 1427 } 1428 1429 static void 1430 vxlan_free_multicast(struct vxlan_softc *sc) 1431 { 1432 1433 if (sc->vxl_mc_ifp != NULL) { 1434 if_rele(sc->vxl_mc_ifp); 1435 sc->vxl_mc_ifp = NULL; 1436 sc->vxl_mc_ifindex = 0; 1437 } 1438 1439 if (sc->vxl_im4o != NULL) { 1440 free(sc->vxl_im4o, M_VXLAN); 1441 sc->vxl_im4o = NULL; 1442 } 1443 1444 if (sc->vxl_im6o != NULL) { 1445 free(sc->vxl_im6o, M_VXLAN); 1446 sc->vxl_im6o = NULL; 1447 } 1448 } 1449 1450 static int 1451 vxlan_setup_multicast_interface(struct vxlan_softc *sc) 1452 { 1453 struct ifnet *ifp; 1454 1455 ifp = ifunit_ref(sc->vxl_mc_ifname); 1456 if (ifp == NULL) { 1457 if_printf(sc->vxl_ifp, "multicast interface %s does " 1458 "not exist\n", sc->vxl_mc_ifname); 1459 return (ENOENT); 1460 } 1461 1462 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1463 if_printf(sc->vxl_ifp, "interface %s does not support " 1464 "multicast\n", sc->vxl_mc_ifname); 1465 if_rele(ifp); 1466 return (ENOTSUP); 1467 } 1468 1469 sc->vxl_mc_ifp = ifp; 1470 sc->vxl_mc_ifindex = ifp->if_index; 1471 1472 return (0); 1473 } 1474 1475 static int 1476 vxlan_setup_multicast(struct vxlan_softc *sc) 1477 { 1478 const union vxlan_sockaddr *group; 1479 int error; 1480 1481 group = &sc->vxl_dst_addr; 1482 error = 0; 1483 1484 if (sc->vxl_mc_ifname[0] != '\0') { 1485 error = vxlan_setup_multicast_interface(sc); 1486 if (error) 1487 return (error); 1488 } 1489 1490 /* 1491 * Initialize an multicast options structure that is sufficiently 1492 * populated for use in the respective IP output routine. This 1493 * structure is typically stored in the socket, but our sockets 1494 * may be shared among multiple interfaces. 1495 */ 1496 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1497 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, 1498 M_ZERO | M_WAITOK); 1499 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; 1500 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 1501 sc->vxl_im4o->imo_multicast_vif = -1; 1502 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1503 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, 1504 M_ZERO | M_WAITOK); 1505 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; 1506 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 1507 } 1508 1509 return (error); 1510 } 1511 1512 static int 1513 vxlan_setup_socket(struct vxlan_softc *sc) 1514 { 1515 struct vxlan_socket *vso; 1516 struct ifnet *ifp; 1517 union vxlan_sockaddr *saddr, *daddr; 1518 int multicast, error; 1519 1520 vso = NULL; 1521 ifp = sc->vxl_ifp; 1522 saddr = &sc->vxl_src_addr; 1523 daddr = &sc->vxl_dst_addr; 1524 1525 multicast = vxlan_sockaddr_in_multicast(daddr); 1526 MPASS(multicast != -1); 1527 sc->vxl_vso_mc_index = -1; 1528 1529 /* 1530 * Try to create the socket. If that fails, attempt to use an 1531 * existing socket. 1532 */ 1533 error = vxlan_socket_create(ifp, multicast, saddr, &vso); 1534 if (error) { 1535 if (multicast != 0) 1536 vso = vxlan_socket_mc_lookup(saddr); 1537 else 1538 vso = vxlan_socket_lookup(saddr); 1539 1540 if (vso == NULL) { 1541 if_printf(ifp, "cannot create socket (error: %d), " 1542 "and no existing socket found\n", error); 1543 goto out; 1544 } 1545 } 1546 1547 if (multicast != 0) { 1548 error = vxlan_setup_multicast(sc); 1549 if (error) 1550 goto out; 1551 1552 error = vxlan_socket_mc_add_group(vso, daddr, saddr, 1553 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); 1554 if (error) 1555 goto out; 1556 } 1557 1558 sc->vxl_sock = vso; 1559 error = vxlan_socket_insert_softc(vso, sc); 1560 if (error) { 1561 sc->vxl_sock = NULL; 1562 if_printf(ifp, "network identifier %d already exists in " 1563 "this socket\n", sc->vxl_vni); 1564 goto out; 1565 } 1566 1567 return (0); 1568 1569 out: 1570 if (vso != NULL) { 1571 if (sc->vxl_vso_mc_index != -1) { 1572 vxlan_socket_mc_release_group_by_idx(vso, 1573 sc->vxl_vso_mc_index); 1574 sc->vxl_vso_mc_index = -1; 1575 } 1576 if (multicast != 0) 1577 vxlan_free_multicast(sc); 1578 vxlan_socket_release(vso); 1579 } 1580 1581 return (error); 1582 } 1583 1584 #ifdef INET6 1585 static void 1586 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc) 1587 { 1588 1589 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr)) 1590 return; 1591 1592 MPASS(sc->vxl_src_addr.in6.sin6_port != 0); 1593 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0); 1594 1595 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) { 1596 if_printf(sc->vxl_ifp, "port %d in src address does not match " 1597 "port %d in dst address, rfc6935_port (%d) not updated.\n", 1598 ntohs(sc->vxl_src_addr.in6.sin6_port), 1599 ntohs(sc->vxl_dst_addr.in6.sin6_port), 1600 V_zero_checksum_port); 1601 return; 1602 } 1603 1604 if (V_zero_checksum_port != 0) { 1605 if (V_zero_checksum_port != 1606 ntohs(sc->vxl_src_addr.in6.sin6_port)) { 1607 if_printf(sc->vxl_ifp, "rfc6935_port is already set to " 1608 "%d, cannot set it to %d.\n", V_zero_checksum_port, 1609 ntohs(sc->vxl_src_addr.in6.sin6_port)); 1610 } 1611 return; 1612 } 1613 1614 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port); 1615 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n", 1616 V_zero_checksum_port); 1617 } 1618 #endif 1619 1620 static void 1621 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc) 1622 { 1623 struct ifnet *ifp; 1624 1625 VXLAN_LOCK_WASSERT(sc); 1626 1627 ifp = sc->vxl_ifp; 1628 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); 1629 1630 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) 1631 ifp->if_hdrlen += sizeof(struct ip); 1632 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) 1633 ifp->if_hdrlen += sizeof(struct ip6_hdr); 1634 1635 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0) 1636 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen; 1637 } 1638 1639 static int 1640 vxlan_valid_init_config(struct vxlan_softc *sc) 1641 { 1642 const char *reason; 1643 1644 if (vxlan_check_vni(sc->vxl_vni) != 0) { 1645 reason = "invalid virtual network identifier specified"; 1646 goto fail; 1647 } 1648 1649 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { 1650 reason = "source address type is not supported"; 1651 goto fail; 1652 } 1653 1654 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { 1655 reason = "destination address type is not supported"; 1656 goto fail; 1657 } 1658 1659 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { 1660 reason = "no valid destination address specified"; 1661 goto fail; 1662 } 1663 1664 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && 1665 sc->vxl_mc_ifname[0] != '\0') { 1666 reason = "can only specify interface with a group address"; 1667 goto fail; 1668 } 1669 1670 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 1671 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ 1672 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { 1673 reason = "source and destination address must both " 1674 "be either IPv4 or IPv6"; 1675 goto fail; 1676 } 1677 } 1678 1679 if (sc->vxl_src_addr.in4.sin_port == 0) { 1680 reason = "local port not specified"; 1681 goto fail; 1682 } 1683 1684 if (sc->vxl_dst_addr.in4.sin_port == 0) { 1685 reason = "remote port not specified"; 1686 goto fail; 1687 } 1688 1689 return (0); 1690 1691 fail: 1692 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); 1693 return (EINVAL); 1694 } 1695 1696 static void 1697 vxlan_init_wait(struct vxlan_softc *sc) 1698 { 1699 1700 VXLAN_LOCK_WASSERT(sc); 1701 while (sc->vxl_flags & VXLAN_FLAG_INIT) 1702 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); 1703 } 1704 1705 static void 1706 vxlan_init_complete(struct vxlan_softc *sc) 1707 { 1708 1709 VXLAN_WLOCK(sc); 1710 sc->vxl_flags &= ~VXLAN_FLAG_INIT; 1711 wakeup(sc); 1712 VXLAN_WUNLOCK(sc); 1713 } 1714 1715 static void 1716 vxlan_init(void *xsc) 1717 { 1718 static const uint8_t empty_mac[ETHER_ADDR_LEN]; 1719 struct vxlan_softc *sc; 1720 struct ifnet *ifp; 1721 1722 sc = xsc; 1723 ifp = sc->vxl_ifp; 1724 1725 sx_xlock(&vxlan_sx); 1726 VXLAN_WLOCK(sc); 1727 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1728 VXLAN_WUNLOCK(sc); 1729 sx_xunlock(&vxlan_sx); 1730 return; 1731 } 1732 sc->vxl_flags |= VXLAN_FLAG_INIT; 1733 VXLAN_WUNLOCK(sc); 1734 1735 if (vxlan_valid_init_config(sc) != 0) 1736 goto out; 1737 1738 if (vxlan_setup_socket(sc) != 0) 1739 goto out; 1740 1741 #ifdef INET6 1742 vxlan_setup_zero_checksum_port(sc); 1743 #endif 1744 1745 /* Initialize the default forwarding entry. */ 1746 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, 1747 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); 1748 1749 VXLAN_WLOCK(sc); 1750 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1751 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, 1752 vxlan_timer, sc); 1753 VXLAN_WUNLOCK(sc); 1754 1755 if_link_state_change(ifp, LINK_STATE_UP); 1756 1757 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family, 1758 ntohs(sc->vxl_src_addr.in4.sin_port)); 1759 out: 1760 vxlan_init_complete(sc); 1761 sx_xunlock(&vxlan_sx); 1762 } 1763 1764 static void 1765 vxlan_release(struct vxlan_softc *sc) 1766 { 1767 1768 /* 1769 * The softc may be destroyed as soon as we release our reference, 1770 * so we cannot serialize the wakeup with the softc lock. We use a 1771 * timeout in our sleeps so a missed wakeup is unfortunate but not 1772 * fatal. 1773 */ 1774 if (VXLAN_RELEASE(sc) != 0) 1775 wakeup(sc); 1776 } 1777 1778 static void 1779 vxlan_teardown_wait(struct vxlan_softc *sc) 1780 { 1781 1782 VXLAN_LOCK_WASSERT(sc); 1783 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1784 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); 1785 } 1786 1787 static void 1788 vxlan_teardown_complete(struct vxlan_softc *sc) 1789 { 1790 1791 VXLAN_WLOCK(sc); 1792 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; 1793 wakeup(sc); 1794 VXLAN_WUNLOCK(sc); 1795 } 1796 1797 static void 1798 vxlan_teardown_locked(struct vxlan_softc *sc) 1799 { 1800 struct ifnet *ifp; 1801 struct vxlan_socket *vso; 1802 1803 sx_assert(&vxlan_sx, SA_XLOCKED); 1804 VXLAN_LOCK_WASSERT(sc); 1805 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); 1806 1807 ifp = sc->vxl_ifp; 1808 ifp->if_flags &= ~IFF_UP; 1809 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1810 callout_stop(&sc->vxl_callout); 1811 vso = sc->vxl_sock; 1812 sc->vxl_sock = NULL; 1813 1814 VXLAN_WUNLOCK(sc); 1815 if_link_state_change(ifp, LINK_STATE_DOWN); 1816 EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family, 1817 ntohs(sc->vxl_src_addr.in4.sin_port)); 1818 1819 if (vso != NULL) { 1820 vxlan_socket_remove_softc(vso, sc); 1821 1822 if (sc->vxl_vso_mc_index != -1) { 1823 vxlan_socket_mc_release_group_by_idx(vso, 1824 sc->vxl_vso_mc_index); 1825 sc->vxl_vso_mc_index = -1; 1826 } 1827 } 1828 1829 VXLAN_WLOCK(sc); 1830 while (sc->vxl_refcnt != 0) 1831 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); 1832 VXLAN_WUNLOCK(sc); 1833 1834 callout_drain(&sc->vxl_callout); 1835 1836 vxlan_free_multicast(sc); 1837 if (vso != NULL) 1838 vxlan_socket_release(vso); 1839 1840 vxlan_teardown_complete(sc); 1841 } 1842 1843 static void 1844 vxlan_teardown(struct vxlan_softc *sc) 1845 { 1846 1847 sx_xlock(&vxlan_sx); 1848 VXLAN_WLOCK(sc); 1849 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { 1850 vxlan_teardown_wait(sc); 1851 VXLAN_WUNLOCK(sc); 1852 sx_xunlock(&vxlan_sx); 1853 return; 1854 } 1855 1856 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1857 vxlan_teardown_locked(sc); 1858 sx_xunlock(&vxlan_sx); 1859 } 1860 1861 static void 1862 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, 1863 struct vxlan_softc_head *list) 1864 { 1865 1866 VXLAN_WLOCK(sc); 1867 1868 if (sc->vxl_mc_ifp != ifp) 1869 goto out; 1870 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1871 goto out; 1872 1873 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1874 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); 1875 1876 out: 1877 VXLAN_WUNLOCK(sc); 1878 } 1879 1880 static void 1881 vxlan_timer(void *xsc) 1882 { 1883 struct vxlan_softc *sc; 1884 1885 sc = xsc; 1886 VXLAN_LOCK_WASSERT(sc); 1887 1888 vxlan_ftable_expire(sc); 1889 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); 1890 } 1891 1892 static int 1893 vxlan_ioctl_ifflags(struct vxlan_softc *sc) 1894 { 1895 struct ifnet *ifp; 1896 1897 ifp = sc->vxl_ifp; 1898 1899 if (ifp->if_flags & IFF_UP) { 1900 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1901 vxlan_init(sc); 1902 } else { 1903 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1904 vxlan_teardown(sc); 1905 } 1906 1907 return (0); 1908 } 1909 1910 static int 1911 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) 1912 { 1913 struct rm_priotracker tracker; 1914 struct ifvxlancfg *cfg; 1915 1916 cfg = arg; 1917 bzero(cfg, sizeof(*cfg)); 1918 1919 VXLAN_RLOCK(sc, &tracker); 1920 cfg->vxlc_vni = sc->vxl_vni; 1921 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, 1922 sizeof(union vxlan_sockaddr)); 1923 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, 1924 sizeof(union vxlan_sockaddr)); 1925 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; 1926 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; 1927 cfg->vxlc_ftable_max = sc->vxl_ftable_max; 1928 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; 1929 cfg->vxlc_port_min = sc->vxl_min_port; 1930 cfg->vxlc_port_max = sc->vxl_max_port; 1931 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; 1932 cfg->vxlc_ttl = sc->vxl_ttl; 1933 VXLAN_RUNLOCK(sc, &tracker); 1934 1935 #ifdef INET6 1936 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) 1937 sa6_recoverscope(&cfg->vxlc_local_sa.in6); 1938 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) 1939 sa6_recoverscope(&cfg->vxlc_remote_sa.in6); 1940 #endif 1941 1942 return (0); 1943 } 1944 1945 static int 1946 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) 1947 { 1948 struct ifvxlancmd *cmd; 1949 int error; 1950 1951 cmd = arg; 1952 1953 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) 1954 return (EINVAL); 1955 1956 VXLAN_WLOCK(sc); 1957 if (vxlan_can_change_config(sc)) { 1958 sc->vxl_vni = cmd->vxlcmd_vni; 1959 error = 0; 1960 } else 1961 error = EBUSY; 1962 VXLAN_WUNLOCK(sc); 1963 1964 return (error); 1965 } 1966 1967 static int 1968 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) 1969 { 1970 struct ifvxlancmd *cmd; 1971 union vxlan_sockaddr *vxlsa; 1972 int error; 1973 1974 cmd = arg; 1975 vxlsa = &cmd->vxlcmd_sa; 1976 1977 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 1978 return (EINVAL); 1979 if (vxlan_sockaddr_in_multicast(vxlsa) != 0) 1980 return (EINVAL); 1981 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 1982 error = vxlan_sockaddr_in6_embedscope(vxlsa); 1983 if (error) 1984 return (error); 1985 } 1986 1987 VXLAN_WLOCK(sc); 1988 if (vxlan_can_change_config(sc)) { 1989 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); 1990 vxlan_set_hwcaps(sc); 1991 error = 0; 1992 } else 1993 error = EBUSY; 1994 VXLAN_WUNLOCK(sc); 1995 1996 return (error); 1997 } 1998 1999 static int 2000 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) 2001 { 2002 struct ifvxlancmd *cmd; 2003 union vxlan_sockaddr *vxlsa; 2004 int error; 2005 2006 cmd = arg; 2007 vxlsa = &cmd->vxlcmd_sa; 2008 2009 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 2010 return (EINVAL); 2011 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2012 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2013 if (error) 2014 return (error); 2015 } 2016 2017 VXLAN_WLOCK(sc); 2018 if (vxlan_can_change_config(sc)) { 2019 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); 2020 vxlan_setup_interface_hdrlen(sc); 2021 error = 0; 2022 } else 2023 error = EBUSY; 2024 VXLAN_WUNLOCK(sc); 2025 2026 return (error); 2027 } 2028 2029 static int 2030 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) 2031 { 2032 struct ifvxlancmd *cmd; 2033 int error; 2034 2035 cmd = arg; 2036 2037 if (cmd->vxlcmd_port == 0) 2038 return (EINVAL); 2039 2040 VXLAN_WLOCK(sc); 2041 if (vxlan_can_change_config(sc)) { 2042 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2043 error = 0; 2044 } else 2045 error = EBUSY; 2046 VXLAN_WUNLOCK(sc); 2047 2048 return (error); 2049 } 2050 2051 static int 2052 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) 2053 { 2054 struct ifvxlancmd *cmd; 2055 int error; 2056 2057 cmd = arg; 2058 2059 if (cmd->vxlcmd_port == 0) 2060 return (EINVAL); 2061 2062 VXLAN_WLOCK(sc); 2063 if (vxlan_can_change_config(sc)) { 2064 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2065 error = 0; 2066 } else 2067 error = EBUSY; 2068 VXLAN_WUNLOCK(sc); 2069 2070 return (error); 2071 } 2072 2073 static int 2074 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) 2075 { 2076 struct ifvxlancmd *cmd; 2077 uint16_t min, max; 2078 int error; 2079 2080 cmd = arg; 2081 min = cmd->vxlcmd_port_min; 2082 max = cmd->vxlcmd_port_max; 2083 2084 if (max < min) 2085 return (EINVAL); 2086 2087 VXLAN_WLOCK(sc); 2088 if (vxlan_can_change_config(sc)) { 2089 sc->vxl_min_port = min; 2090 sc->vxl_max_port = max; 2091 error = 0; 2092 } else 2093 error = EBUSY; 2094 VXLAN_WUNLOCK(sc); 2095 2096 return (error); 2097 } 2098 2099 static int 2100 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) 2101 { 2102 struct ifvxlancmd *cmd; 2103 int error; 2104 2105 cmd = arg; 2106 2107 VXLAN_WLOCK(sc); 2108 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { 2109 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; 2110 error = 0; 2111 } else 2112 error = EINVAL; 2113 VXLAN_WUNLOCK(sc); 2114 2115 return (error); 2116 } 2117 2118 static int 2119 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) 2120 { 2121 struct ifvxlancmd *cmd; 2122 int error; 2123 2124 cmd = arg; 2125 2126 VXLAN_WLOCK(sc); 2127 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { 2128 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; 2129 error = 0; 2130 } else 2131 error = EINVAL; 2132 VXLAN_WUNLOCK(sc); 2133 2134 return (error); 2135 } 2136 2137 static int 2138 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) 2139 { 2140 struct ifvxlancmd *cmd; 2141 int error; 2142 2143 cmd = arg; 2144 2145 VXLAN_WLOCK(sc); 2146 if (vxlan_can_change_config(sc)) { 2147 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); 2148 vxlan_set_hwcaps(sc); 2149 error = 0; 2150 } else 2151 error = EBUSY; 2152 VXLAN_WUNLOCK(sc); 2153 2154 return (error); 2155 } 2156 2157 static int 2158 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) 2159 { 2160 struct ifvxlancmd *cmd; 2161 int error; 2162 2163 cmd = arg; 2164 2165 VXLAN_WLOCK(sc); 2166 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { 2167 sc->vxl_ttl = cmd->vxlcmd_ttl; 2168 if (sc->vxl_im4o != NULL) 2169 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 2170 if (sc->vxl_im6o != NULL) 2171 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 2172 error = 0; 2173 } else 2174 error = EINVAL; 2175 VXLAN_WUNLOCK(sc); 2176 2177 return (error); 2178 } 2179 2180 static int 2181 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) 2182 { 2183 struct ifvxlancmd *cmd; 2184 2185 cmd = arg; 2186 2187 VXLAN_WLOCK(sc); 2188 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) 2189 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2190 else 2191 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 2192 VXLAN_WUNLOCK(sc); 2193 2194 return (0); 2195 } 2196 2197 static int 2198 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) 2199 { 2200 union vxlan_sockaddr vxlsa; 2201 struct ifvxlancmd *cmd; 2202 struct vxlan_ftable_entry *fe; 2203 int error; 2204 2205 cmd = arg; 2206 vxlsa = cmd->vxlcmd_sa; 2207 2208 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) 2209 return (EINVAL); 2210 if (vxlan_sockaddr_in_any(&vxlsa) != 0) 2211 return (EINVAL); 2212 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2213 return (EINVAL); 2214 /* BMV: We could support both IPv4 and IPv6 later. */ 2215 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) 2216 return (EAFNOSUPPORT); 2217 2218 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 2219 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 2220 if (error) 2221 return (error); 2222 } 2223 2224 fe = vxlan_ftable_entry_alloc(); 2225 if (fe == NULL) 2226 return (ENOMEM); 2227 2228 if (vxlsa.in4.sin_port == 0) 2229 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 2230 2231 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, 2232 VXLAN_FE_FLAG_STATIC); 2233 2234 VXLAN_WLOCK(sc); 2235 error = vxlan_ftable_entry_insert(sc, fe); 2236 VXLAN_WUNLOCK(sc); 2237 2238 if (error) 2239 vxlan_ftable_entry_free(fe); 2240 2241 return (error); 2242 } 2243 2244 static int 2245 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) 2246 { 2247 struct ifvxlancmd *cmd; 2248 struct vxlan_ftable_entry *fe; 2249 int error; 2250 2251 cmd = arg; 2252 2253 VXLAN_WLOCK(sc); 2254 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); 2255 if (fe != NULL) { 2256 vxlan_ftable_entry_destroy(sc, fe); 2257 error = 0; 2258 } else 2259 error = ENOENT; 2260 VXLAN_WUNLOCK(sc); 2261 2262 return (error); 2263 } 2264 2265 static int 2266 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) 2267 { 2268 struct ifvxlancmd *cmd; 2269 int all; 2270 2271 cmd = arg; 2272 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; 2273 2274 VXLAN_WLOCK(sc); 2275 vxlan_ftable_flush(sc, all); 2276 VXLAN_WUNLOCK(sc); 2277 2278 return (0); 2279 } 2280 2281 static int 2282 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) 2283 { 2284 const struct vxlan_control *vc; 2285 union { 2286 struct ifvxlancfg cfg; 2287 struct ifvxlancmd cmd; 2288 } args; 2289 int out, error; 2290 2291 if (ifd->ifd_cmd >= vxlan_control_table_size) 2292 return (EINVAL); 2293 2294 bzero(&args, sizeof(args)); 2295 vc = &vxlan_control_table[ifd->ifd_cmd]; 2296 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; 2297 2298 if ((get != 0 && out == 0) || (get == 0 && out != 0)) 2299 return (EINVAL); 2300 2301 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { 2302 error = priv_check(curthread, PRIV_NET_VXLAN); 2303 if (error) 2304 return (error); 2305 } 2306 2307 if (ifd->ifd_len != vc->vxlc_argsize || 2308 ifd->ifd_len > sizeof(args)) 2309 return (EINVAL); 2310 2311 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { 2312 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 2313 if (error) 2314 return (error); 2315 } 2316 2317 error = vc->vxlc_func(sc, &args); 2318 if (error) 2319 return (error); 2320 2321 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { 2322 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); 2323 if (error) 2324 return (error); 2325 } 2326 2327 return (0); 2328 } 2329 2330 static int 2331 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2332 { 2333 struct rm_priotracker tracker; 2334 struct vxlan_softc *sc; 2335 struct ifreq *ifr; 2336 struct ifdrv *ifd; 2337 int error; 2338 2339 sc = ifp->if_softc; 2340 ifr = (struct ifreq *) data; 2341 ifd = (struct ifdrv *) data; 2342 2343 error = 0; 2344 2345 switch (cmd) { 2346 case SIOCADDMULTI: 2347 case SIOCDELMULTI: 2348 break; 2349 2350 case SIOCGDRVSPEC: 2351 case SIOCSDRVSPEC: 2352 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); 2353 break; 2354 2355 case SIOCSIFFLAGS: 2356 error = vxlan_ioctl_ifflags(sc); 2357 break; 2358 2359 case SIOCSIFMEDIA: 2360 case SIOCGIFMEDIA: 2361 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); 2362 break; 2363 2364 case SIOCSIFMTU: 2365 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) { 2366 error = EINVAL; 2367 } else { 2368 VXLAN_WLOCK(sc); 2369 ifp->if_mtu = ifr->ifr_mtu; 2370 sc->vxl_flags |= VXLAN_FLAG_USER_MTU; 2371 VXLAN_WUNLOCK(sc); 2372 } 2373 break; 2374 2375 case SIOCSIFCAP: 2376 VXLAN_WLOCK(sc); 2377 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap); 2378 if (error == 0) 2379 vxlan_set_hwcaps(sc); 2380 VXLAN_WUNLOCK(sc); 2381 break; 2382 2383 case SIOCGTUNFIB: 2384 VXLAN_RLOCK(sc, &tracker); 2385 ifr->ifr_fib = sc->vxl_fibnum; 2386 VXLAN_RUNLOCK(sc, &tracker); 2387 break; 2388 2389 case SIOCSTUNFIB: 2390 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0) 2391 break; 2392 2393 if (ifr->ifr_fib >= rt_numfibs) 2394 error = EINVAL; 2395 else { 2396 VXLAN_WLOCK(sc); 2397 sc->vxl_fibnum = ifr->ifr_fib; 2398 VXLAN_WUNLOCK(sc); 2399 } 2400 break; 2401 2402 default: 2403 error = ether_ioctl(ifp, cmd, data); 2404 break; 2405 } 2406 2407 return (error); 2408 } 2409 2410 #if defined(INET) || defined(INET6) 2411 static uint16_t 2412 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) 2413 { 2414 int range; 2415 uint32_t hash; 2416 2417 range = sc->vxl_max_port - sc->vxl_min_port + 1; 2418 2419 if (M_HASHTYPE_ISHASH(m)) 2420 hash = m->m_pkthdr.flowid; 2421 else 2422 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, 2423 sc->vxl_port_hash_key); 2424 2425 return (sc->vxl_min_port + (hash % range)); 2426 } 2427 2428 static void 2429 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, 2430 uint16_t srcport, uint16_t dstport) 2431 { 2432 struct vxlanudphdr *hdr; 2433 struct udphdr *udph; 2434 struct vxlan_header *vxh; 2435 int len; 2436 2437 len = m->m_pkthdr.len - ipoff; 2438 MPASS(len >= sizeof(struct vxlanudphdr)); 2439 hdr = mtodo(m, ipoff); 2440 2441 udph = &hdr->vxlh_udp; 2442 udph->uh_sport = srcport; 2443 udph->uh_dport = dstport; 2444 udph->uh_ulen = htons(len); 2445 udph->uh_sum = 0; 2446 2447 vxh = &hdr->vxlh_hdr; 2448 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); 2449 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); 2450 } 2451 #endif 2452 2453 #if defined(INET6) || defined(INET) 2454 /* 2455 * Return the CSUM_INNER_* equivalent of CSUM_* caps. 2456 */ 2457 static uint32_t 2458 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap) 2459 { 2460 uint32_t csum_flags = encap; 2461 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP; 2462 2463 /* 2464 * csum_flags can request either v4 or v6 offload but not both. 2465 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO) 2466 * so those bits are no good to detect the IP version. Other bits are 2467 * always set with CSUM_TSO and we use those to figure out the IP 2468 * version. 2469 */ 2470 if (csum_flags_in & v4) { 2471 if (csum_flags_in & CSUM_IP) 2472 csum_flags |= CSUM_INNER_IP; 2473 if (csum_flags_in & CSUM_IP_UDP) 2474 csum_flags |= CSUM_INNER_IP_UDP; 2475 if (csum_flags_in & CSUM_IP_TCP) 2476 csum_flags |= CSUM_INNER_IP_TCP; 2477 if (csum_flags_in & CSUM_IP_TSO) 2478 csum_flags |= CSUM_INNER_IP_TSO; 2479 } else { 2480 #ifdef INVARIANTS 2481 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP; 2482 2483 MPASS((csum_flags_in & v6) != 0); 2484 #endif 2485 if (csum_flags_in & CSUM_IP6_UDP) 2486 csum_flags |= CSUM_INNER_IP6_UDP; 2487 if (csum_flags_in & CSUM_IP6_TCP) 2488 csum_flags |= CSUM_INNER_IP6_TCP; 2489 if (csum_flags_in & CSUM_IP6_TSO) 2490 csum_flags |= CSUM_INNER_IP6_TSO; 2491 } 2492 2493 return (csum_flags); 2494 } 2495 #endif 2496 2497 static int 2498 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2499 struct mbuf *m) 2500 { 2501 #ifdef INET 2502 struct ifnet *ifp; 2503 struct ip *ip; 2504 struct in_addr srcaddr, dstaddr; 2505 uint16_t srcport, dstport; 2506 int plen, mcast, error; 2507 struct route route, *ro; 2508 struct sockaddr_in *sin; 2509 uint32_t csum_flags; 2510 2511 NET_EPOCH_ASSERT(); 2512 2513 ifp = sc->vxl_ifp; 2514 srcaddr = sc->vxl_src_addr.in4.sin_addr; 2515 srcport = vxlan_pick_source_port(sc, m); 2516 dstaddr = fvxlsa->in4.sin_addr; 2517 dstport = fvxlsa->in4.sin_port; 2518 2519 plen = m->m_pkthdr.len; 2520 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), 2521 M_NOWAIT); 2522 if (m == NULL) { 2523 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2524 return (ENOBUFS); 2525 } 2526 2527 ip = mtod(m, struct ip *); 2528 ip->ip_tos = 0; 2529 ip->ip_len = htons(m->m_pkthdr.len); 2530 ip->ip_off = 0; 2531 ip->ip_ttl = sc->vxl_ttl; 2532 ip->ip_p = IPPROTO_UDP; 2533 ip->ip_sum = 0; 2534 ip->ip_src = srcaddr; 2535 ip->ip_dst = dstaddr; 2536 2537 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); 2538 2539 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2540 m->m_flags &= ~(M_MCAST | M_BCAST); 2541 2542 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2543 if (m->m_pkthdr.csum_flags != 0) { 2544 /* 2545 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2546 * up the ifnet for the outbound route and verify that the 2547 * outbound ifnet can perform the requested operation on the 2548 * inner frame. 2549 */ 2550 bzero(&route, sizeof(route)); 2551 ro = &route; 2552 sin = (struct sockaddr_in *)&ro->ro_dst; 2553 sin->sin_family = AF_INET; 2554 sin->sin_len = sizeof(*sin); 2555 sin->sin_addr = ip->ip_dst; 2556 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 2557 0); 2558 if (ro->ro_nh == NULL) { 2559 m_freem(m); 2560 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2561 return (EHOSTUNREACH); 2562 } 2563 2564 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2565 CSUM_ENCAP_VXLAN); 2566 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2567 csum_flags) { 2568 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2569 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2570 2571 if_printf(ifp, "interface %s is missing hwcaps " 2572 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2573 "hwassist 0x%08x\n", nh_ifp->if_xname, 2574 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2575 m->m_pkthdr.csum_flags, csum_flags, 2576 (uint32_t)nh_ifp->if_hwassist); 2577 } 2578 m_freem(m); 2579 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2580 return (ENXIO); 2581 } 2582 m->m_pkthdr.csum_flags = csum_flags; 2583 if (csum_flags & 2584 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2585 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2586 counter_u64_add(sc->vxl_stats.txcsum, 1); 2587 if (csum_flags & CSUM_INNER_TSO) 2588 counter_u64_add(sc->vxl_stats.tso, 1); 2589 } 2590 } else 2591 ro = NULL; 2592 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL); 2593 if (error == 0) { 2594 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2595 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2596 if (mcast != 0) 2597 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2598 } else 2599 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2600 2601 return (error); 2602 #else 2603 m_freem(m); 2604 return (ENOTSUP); 2605 #endif 2606 } 2607 2608 static int 2609 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2610 struct mbuf *m) 2611 { 2612 #ifdef INET6 2613 struct ifnet *ifp; 2614 struct ip6_hdr *ip6; 2615 const struct in6_addr *srcaddr, *dstaddr; 2616 uint16_t srcport, dstport; 2617 int plen, mcast, error; 2618 struct route_in6 route, *ro; 2619 struct sockaddr_in6 *sin6; 2620 uint32_t csum_flags; 2621 2622 NET_EPOCH_ASSERT(); 2623 2624 ifp = sc->vxl_ifp; 2625 srcaddr = &sc->vxl_src_addr.in6.sin6_addr; 2626 srcport = vxlan_pick_source_port(sc, m); 2627 dstaddr = &fvxlsa->in6.sin6_addr; 2628 dstport = fvxlsa->in6.sin6_port; 2629 2630 plen = m->m_pkthdr.len; 2631 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), 2632 M_NOWAIT); 2633 if (m == NULL) { 2634 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2635 return (ENOBUFS); 2636 } 2637 2638 ip6 = mtod(m, struct ip6_hdr *); 2639 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ 2640 ip6->ip6_vfc = IPV6_VERSION; 2641 ip6->ip6_plen = 0; 2642 ip6->ip6_nxt = IPPROTO_UDP; 2643 ip6->ip6_hlim = sc->vxl_ttl; 2644 ip6->ip6_src = *srcaddr; 2645 ip6->ip6_dst = *dstaddr; 2646 2647 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); 2648 2649 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2650 m->m_flags &= ~(M_MCAST | M_BCAST); 2651 2652 ro = NULL; 2653 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2654 if (m->m_pkthdr.csum_flags != 0) { 2655 /* 2656 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2657 * up the ifnet for the outbound route and verify that the 2658 * outbound ifnet can perform the requested operation on the 2659 * inner frame. 2660 */ 2661 bzero(&route, sizeof(route)); 2662 ro = &route; 2663 sin6 = (struct sockaddr_in6 *)&ro->ro_dst; 2664 sin6->sin6_family = AF_INET6; 2665 sin6->sin6_len = sizeof(*sin6); 2666 sin6->sin6_addr = ip6->ip6_dst; 2667 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, 2668 NHR_NONE, 0); 2669 if (ro->ro_nh == NULL) { 2670 m_freem(m); 2671 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2672 return (EHOSTUNREACH); 2673 } 2674 2675 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2676 CSUM_ENCAP_VXLAN); 2677 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2678 csum_flags) { 2679 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2680 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2681 2682 if_printf(ifp, "interface %s is missing hwcaps " 2683 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2684 "hwassist 0x%08x\n", nh_ifp->if_xname, 2685 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2686 m->m_pkthdr.csum_flags, csum_flags, 2687 (uint32_t)nh_ifp->if_hwassist); 2688 } 2689 m_freem(m); 2690 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2691 return (ENXIO); 2692 } 2693 m->m_pkthdr.csum_flags = csum_flags; 2694 if (csum_flags & 2695 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2696 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2697 counter_u64_add(sc->vxl_stats.txcsum, 1); 2698 if (csum_flags & CSUM_INNER_TSO) 2699 counter_u64_add(sc->vxl_stats.tso, 1); 2700 } 2701 } else if (ntohs(dstport) != V_zero_checksum_port) { 2702 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); 2703 2704 hdr->uh_sum = in6_cksum_pseudo(ip6, 2705 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); 2706 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; 2707 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 2708 } 2709 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL); 2710 if (error == 0) { 2711 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2712 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2713 if (mcast != 0) 2714 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2715 } else 2716 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2717 2718 return (error); 2719 #else 2720 m_freem(m); 2721 return (ENOTSUP); 2722 #endif 2723 } 2724 2725 static int 2726 vxlan_transmit(struct ifnet *ifp, struct mbuf *m) 2727 { 2728 struct rm_priotracker tracker; 2729 union vxlan_sockaddr vxlsa; 2730 struct vxlan_softc *sc; 2731 struct vxlan_ftable_entry *fe; 2732 struct ifnet *mcifp; 2733 struct ether_header *eh; 2734 int ipv4, error; 2735 2736 sc = ifp->if_softc; 2737 eh = mtod(m, struct ether_header *); 2738 fe = NULL; 2739 mcifp = NULL; 2740 2741 ETHER_BPF_MTAP(ifp, m); 2742 2743 VXLAN_RLOCK(sc, &tracker); 2744 M_SETFIB(m, sc->vxl_fibnum); 2745 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2746 VXLAN_RUNLOCK(sc, &tracker); 2747 m_freem(m); 2748 return (ENETDOWN); 2749 } 2750 2751 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 2752 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); 2753 if (fe == NULL) 2754 fe = &sc->vxl_default_fe; 2755 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); 2756 2757 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; 2758 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2759 mcifp = vxlan_multicast_if_ref(sc, ipv4); 2760 2761 VXLAN_ACQUIRE(sc); 2762 VXLAN_RUNLOCK(sc, &tracker); 2763 2764 if (ipv4 != 0) 2765 error = vxlan_encap4(sc, &vxlsa, m); 2766 else 2767 error = vxlan_encap6(sc, &vxlsa, m); 2768 2769 vxlan_release(sc); 2770 if (mcifp != NULL) 2771 if_rele(mcifp); 2772 2773 return (error); 2774 } 2775 2776 static void 2777 vxlan_qflush(struct ifnet *ifp __unused) 2778 { 2779 } 2780 2781 static bool 2782 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, 2783 const struct sockaddr *srcsa, void *xvso) 2784 { 2785 struct vxlan_socket *vso; 2786 struct vxlan_header *vxh, vxlanhdr; 2787 uint32_t vni; 2788 int error __unused; 2789 2790 M_ASSERTPKTHDR(m); 2791 vso = xvso; 2792 offset += sizeof(struct udphdr); 2793 2794 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) 2795 goto out; 2796 2797 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { 2798 m_copydata(m, offset, sizeof(struct vxlan_header), 2799 (caddr_t) &vxlanhdr); 2800 vxh = &vxlanhdr; 2801 } else 2802 vxh = mtodo(m, offset); 2803 2804 /* 2805 * Drop if there is a reserved bit set in either the flags or VNI 2806 * fields of the header. This goes against the specification, but 2807 * a bit set may indicate an unsupported new feature. This matches 2808 * the behavior of the Linux implementation. 2809 */ 2810 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || 2811 vxh->vxlh_vni & ~VXLAN_VNI_MASK) 2812 goto out; 2813 2814 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; 2815 2816 /* Adjust to the start of the inner Ethernet frame. */ 2817 m_adj_decap(m, offset + sizeof(struct vxlan_header)); 2818 2819 error = vxlan_input(vso, vni, &m, srcsa); 2820 MPASS(error != 0 || m == NULL); 2821 2822 out: 2823 if (m != NULL) 2824 m_freem(m); 2825 2826 return (true); 2827 } 2828 2829 static int 2830 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, 2831 const struct sockaddr *sa) 2832 { 2833 struct vxlan_softc *sc; 2834 struct ifnet *ifp; 2835 struct mbuf *m; 2836 struct ether_header *eh; 2837 int error; 2838 2839 m = *m0; 2840 2841 if (m->m_pkthdr.len < ETHER_HDR_LEN) 2842 return (EINVAL); 2843 2844 sc = vxlan_socket_lookup_softc(vso, vni); 2845 if (sc == NULL) 2846 return (ENOENT); 2847 2848 ifp = sc->vxl_ifp; 2849 if (m->m_len < ETHER_HDR_LEN && 2850 (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { 2851 *m0 = NULL; 2852 error = ENOBUFS; 2853 goto out; 2854 } 2855 eh = mtod(m, struct ether_header *); 2856 2857 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2858 error = ENETDOWN; 2859 goto out; 2860 } else if (ifp == m->m_pkthdr.rcvif) { 2861 /* XXX Does not catch more complex loops. */ 2862 error = EDEADLK; 2863 goto out; 2864 } 2865 2866 if (sc->vxl_flags & VXLAN_FLAG_LEARN) 2867 vxlan_ftable_learn(sc, sa, eh->ether_shost); 2868 2869 m_clrprotoflags(m); 2870 m->m_pkthdr.rcvif = ifp; 2871 M_SETFIB(m, ifp->if_fib); 2872 if (((ifp->if_capenable & IFCAP_RXCSUM && 2873 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) || 2874 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2875 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) { 2876 uint32_t csum_flags = 0; 2877 2878 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) 2879 csum_flags |= CSUM_L3_CALC; 2880 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID) 2881 csum_flags |= CSUM_L3_VALID; 2882 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC) 2883 csum_flags |= CSUM_L4_CALC; 2884 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID) 2885 csum_flags |= CSUM_L4_VALID; 2886 m->m_pkthdr.csum_flags = csum_flags; 2887 counter_u64_add(sc->vxl_stats.rxcsum, 1); 2888 } else { 2889 /* clear everything */ 2890 m->m_pkthdr.csum_flags = 0; 2891 m->m_pkthdr.csum_data = 0; 2892 } 2893 2894 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2895 (*ifp->if_input)(ifp, m); 2896 *m0 = NULL; 2897 error = 0; 2898 2899 out: 2900 vxlan_release(sc); 2901 return (error); 2902 } 2903 2904 static int 2905 vxlan_stats_alloc(struct vxlan_softc *sc) 2906 { 2907 struct vxlan_statistics *stats = &sc->vxl_stats; 2908 2909 stats->txcsum = counter_u64_alloc(M_WAITOK); 2910 if (stats->txcsum == NULL) 2911 goto failed; 2912 2913 stats->tso = counter_u64_alloc(M_WAITOK); 2914 if (stats->tso == NULL) 2915 goto failed; 2916 2917 stats->rxcsum = counter_u64_alloc(M_WAITOK); 2918 if (stats->rxcsum == NULL) 2919 goto failed; 2920 2921 return (0); 2922 failed: 2923 vxlan_stats_free(sc); 2924 return (ENOMEM); 2925 } 2926 2927 static void 2928 vxlan_stats_free(struct vxlan_softc *sc) 2929 { 2930 struct vxlan_statistics *stats = &sc->vxl_stats; 2931 2932 if (stats->txcsum != NULL) { 2933 counter_u64_free(stats->txcsum); 2934 stats->txcsum = NULL; 2935 } 2936 if (stats->tso != NULL) { 2937 counter_u64_free(stats->tso); 2938 stats->tso = NULL; 2939 } 2940 if (stats->rxcsum != NULL) { 2941 counter_u64_free(stats->rxcsum); 2942 stats->rxcsum = NULL; 2943 } 2944 } 2945 2946 static void 2947 vxlan_set_default_config(struct vxlan_softc *sc) 2948 { 2949 2950 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2951 2952 sc->vxl_vni = VXLAN_VNI_MAX; 2953 sc->vxl_ttl = IPDEFTTL; 2954 2955 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { 2956 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); 2957 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); 2958 } else { 2959 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2960 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2961 } 2962 2963 sc->vxl_min_port = V_ipport_firstauto; 2964 sc->vxl_max_port = V_ipport_lastauto; 2965 2966 sc->vxl_ftable_max = VXLAN_FTABLE_MAX; 2967 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; 2968 } 2969 2970 static int 2971 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) 2972 { 2973 2974 #ifndef INET 2975 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | 2976 VXLAN_PARAM_WITH_REMOTE_ADDR4)) 2977 return (EAFNOSUPPORT); 2978 #endif 2979 2980 #ifndef INET6 2981 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | 2982 VXLAN_PARAM_WITH_REMOTE_ADDR6)) 2983 return (EAFNOSUPPORT); 2984 #else 2985 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 2986 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); 2987 if (error) 2988 return (error); 2989 } 2990 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 2991 int error = vxlan_sockaddr_in6_embedscope( 2992 &vxlp->vxlp_remote_sa); 2993 if (error) 2994 return (error); 2995 } 2996 #endif 2997 2998 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { 2999 if (vxlan_check_vni(vxlp->vxlp_vni) == 0) 3000 sc->vxl_vni = vxlp->vxlp_vni; 3001 } 3002 3003 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { 3004 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); 3005 sc->vxl_src_addr.in4.sin_family = AF_INET; 3006 sc->vxl_src_addr.in4.sin_addr = 3007 vxlp->vxlp_local_sa.in4.sin_addr; 3008 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 3009 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3010 sc->vxl_src_addr.in6.sin6_family = AF_INET6; 3011 sc->vxl_src_addr.in6.sin6_addr = 3012 vxlp->vxlp_local_sa.in6.sin6_addr; 3013 } 3014 3015 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { 3016 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); 3017 sc->vxl_dst_addr.in4.sin_family = AF_INET; 3018 sc->vxl_dst_addr.in4.sin_addr = 3019 vxlp->vxlp_remote_sa.in4.sin_addr; 3020 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 3021 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3022 sc->vxl_dst_addr.in6.sin6_family = AF_INET6; 3023 sc->vxl_dst_addr.in6.sin6_addr = 3024 vxlp->vxlp_remote_sa.in6.sin6_addr; 3025 } 3026 3027 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) 3028 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); 3029 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) 3030 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); 3031 3032 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { 3033 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { 3034 sc->vxl_min_port = vxlp->vxlp_min_port; 3035 sc->vxl_max_port = vxlp->vxlp_max_port; 3036 } 3037 } 3038 3039 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) 3040 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); 3041 3042 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { 3043 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) 3044 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; 3045 } 3046 3047 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { 3048 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) 3049 sc->vxl_ftable_max = vxlp->vxlp_ftable_max; 3050 } 3051 3052 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { 3053 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) 3054 sc->vxl_ttl = vxlp->vxlp_ttl; 3055 } 3056 3057 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { 3058 if (vxlp->vxlp_learn == 0) 3059 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 3060 } 3061 3062 return (0); 3063 } 3064 3065 static int 3066 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap) 3067 { 3068 int mask = reqcap ^ ifp->if_capenable; 3069 3070 /* Disable TSO if tx checksums are disabled. */ 3071 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) && 3072 reqcap & IFCAP_TSO4) { 3073 reqcap &= ~IFCAP_TSO4; 3074 if_printf(ifp, "tso4 disabled due to -txcsum.\n"); 3075 } 3076 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) && 3077 reqcap & IFCAP_TSO6) { 3078 reqcap &= ~IFCAP_TSO6; 3079 if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); 3080 } 3081 3082 /* Do not enable TSO if tx checksums are disabled. */ 3083 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 && 3084 !(reqcap & IFCAP_TXCSUM)) { 3085 if_printf(ifp, "enable txcsum first.\n"); 3086 return (EAGAIN); 3087 } 3088 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 && 3089 !(reqcap & IFCAP_TXCSUM_IPV6)) { 3090 if_printf(ifp, "enable txcsum6 first.\n"); 3091 return (EAGAIN); 3092 } 3093 3094 sc->vxl_reqcap = reqcap; 3095 return (0); 3096 } 3097 3098 /* 3099 * A VXLAN interface inherits the capabilities of the vxlandev or the interface 3100 * hosting the vxlanlocal address. 3101 */ 3102 static void 3103 vxlan_set_hwcaps(struct vxlan_softc *sc) 3104 { 3105 struct epoch_tracker et; 3106 struct ifnet *p; 3107 struct ifaddr *ifa; 3108 u_long hwa; 3109 int cap, ena; 3110 bool rel; 3111 struct ifnet *ifp = sc->vxl_ifp; 3112 3113 /* reset caps */ 3114 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS; 3115 ifp->if_capenable &= VXLAN_BASIC_IFCAPS; 3116 ifp->if_hwassist = 0; 3117 3118 NET_EPOCH_ENTER(et); 3119 CURVNET_SET(ifp->if_vnet); 3120 3121 rel = false; 3122 p = NULL; 3123 if (sc->vxl_mc_ifname[0] != '\0') { 3124 rel = true; 3125 p = ifunit_ref(sc->vxl_mc_ifname); 3126 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 3127 if (sc->vxl_src_addr.sa.sa_family == AF_INET) { 3128 struct sockaddr_in in4 = sc->vxl_src_addr.in4; 3129 3130 in4.sin_port = 0; 3131 ifa = ifa_ifwithaddr((struct sockaddr *)&in4); 3132 if (ifa != NULL) 3133 p = ifa->ifa_ifp; 3134 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) { 3135 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6; 3136 3137 in6.sin6_port = 0; 3138 ifa = ifa_ifwithaddr((struct sockaddr *)&in6); 3139 if (ifa != NULL) 3140 p = ifa->ifa_ifp; 3141 } 3142 } 3143 if (p == NULL) 3144 goto done; 3145 3146 cap = ena = hwa = 0; 3147 3148 /* checksum offload */ 3149 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM) 3150 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3151 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) { 3152 ena |= sc->vxl_reqcap & p->if_capenable & 3153 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3154 if (ena & IFCAP_TXCSUM) { 3155 if (p->if_hwassist & CSUM_INNER_IP) 3156 hwa |= CSUM_IP; 3157 if (p->if_hwassist & CSUM_INNER_IP_UDP) 3158 hwa |= CSUM_IP_UDP; 3159 if (p->if_hwassist & CSUM_INNER_IP_TCP) 3160 hwa |= CSUM_IP_TCP; 3161 } 3162 if (ena & IFCAP_TXCSUM_IPV6) { 3163 if (p->if_hwassist & CSUM_INNER_IP6_UDP) 3164 hwa |= CSUM_IP6_UDP; 3165 if (p->if_hwassist & CSUM_INNER_IP6_TCP) 3166 hwa |= CSUM_IP6_TCP; 3167 } 3168 } 3169 3170 /* hardware TSO */ 3171 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) { 3172 cap |= p->if_capabilities & IFCAP_TSO; 3173 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen) 3174 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen; 3175 else 3176 ifp->if_hw_tsomax = p->if_hw_tsomax; 3177 /* XXX: tsomaxsegcount decrement is cxgbe specific */ 3178 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1; 3179 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize; 3180 } 3181 if (p->if_capenable & IFCAP_VXLAN_HWTSO) { 3182 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO; 3183 if (ena & IFCAP_TSO) { 3184 if (p->if_hwassist & CSUM_INNER_IP_TSO) 3185 hwa |= CSUM_IP_TSO; 3186 if (p->if_hwassist & CSUM_INNER_IP6_TSO) 3187 hwa |= CSUM_IP6_TSO; 3188 } 3189 } 3190 3191 ifp->if_capabilities |= cap; 3192 ifp->if_capenable |= ena; 3193 ifp->if_hwassist |= hwa; 3194 if (rel) 3195 if_rele(p); 3196 done: 3197 CURVNET_RESTORE(); 3198 NET_EPOCH_EXIT(et); 3199 } 3200 3201 static int 3202 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len, 3203 struct ifc_data *ifd, struct ifnet **ifpp) 3204 { 3205 struct vxlan_softc *sc; 3206 struct ifnet *ifp; 3207 struct ifvxlanparam vxlp; 3208 int error; 3209 3210 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); 3211 sc->vxl_unit = ifd->unit; 3212 sc->vxl_fibnum = curthread->td_proc->p_fibnum; 3213 vxlan_set_default_config(sc); 3214 error = vxlan_stats_alloc(sc); 3215 if (error != 0) 3216 goto fail; 3217 3218 if (ifd->params != NULL) { 3219 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp)); 3220 if (error) 3221 goto fail; 3222 3223 error = vxlan_set_user_config(sc, &vxlp); 3224 if (error) 3225 goto fail; 3226 } 3227 3228 ifp = if_alloc(IFT_ETHER); 3229 if (ifp == NULL) { 3230 error = ENOSPC; 3231 goto fail; 3232 } 3233 3234 sc->vxl_ifp = ifp; 3235 rm_init(&sc->vxl_lock, "vxlanrm"); 3236 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); 3237 sc->vxl_port_hash_key = arc4random(); 3238 vxlan_ftable_init(sc); 3239 3240 vxlan_sysctl_setup(sc); 3241 3242 ifp->if_softc = sc; 3243 if_initname(ifp, vxlan_name, ifd->unit); 3244 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3245 ifp->if_init = vxlan_init; 3246 ifp->if_ioctl = vxlan_ioctl; 3247 ifp->if_transmit = vxlan_transmit; 3248 ifp->if_qflush = vxlan_qflush; 3249 ifp->if_capabilities = VXLAN_BASIC_IFCAPS; 3250 ifp->if_capenable = VXLAN_BASIC_IFCAPS; 3251 sc->vxl_reqcap = -1; 3252 vxlan_set_hwcaps(sc); 3253 3254 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); 3255 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); 3256 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); 3257 3258 ether_gen_addr(ifp, &sc->vxl_hwaddr); 3259 ether_ifattach(ifp, sc->vxl_hwaddr.octet); 3260 3261 ifp->if_baudrate = 0; 3262 3263 VXLAN_WLOCK(sc); 3264 vxlan_setup_interface_hdrlen(sc); 3265 VXLAN_WUNLOCK(sc); 3266 *ifpp = ifp; 3267 3268 return (0); 3269 3270 fail: 3271 free(sc, M_VXLAN); 3272 return (error); 3273 } 3274 3275 static int 3276 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 3277 { 3278 struct vxlan_softc *sc; 3279 3280 sc = ifp->if_softc; 3281 3282 vxlan_teardown(sc); 3283 3284 vxlan_ftable_flush(sc, 1); 3285 3286 ether_ifdetach(ifp); 3287 if_free(ifp); 3288 ifmedia_removeall(&sc->vxl_media); 3289 3290 vxlan_ftable_fini(sc); 3291 3292 vxlan_sysctl_destroy(sc); 3293 rm_destroy(&sc->vxl_lock); 3294 vxlan_stats_free(sc); 3295 free(sc, M_VXLAN); 3296 3297 return (0); 3298 } 3299 3300 /* BMV: Taken from if_bridge. */ 3301 static uint32_t 3302 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) 3303 { 3304 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; 3305 3306 b += addr[5] << 8; 3307 b += addr[4]; 3308 a += addr[3] << 24; 3309 a += addr[2] << 16; 3310 a += addr[1] << 8; 3311 a += addr[0]; 3312 3313 /* 3314 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3315 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3316 */ 3317 #define mix(a, b, c) \ 3318 do { \ 3319 a -= b; a -= c; a ^= (c >> 13); \ 3320 b -= c; b -= a; b ^= (a << 8); \ 3321 c -= a; c -= b; c ^= (b >> 13); \ 3322 a -= b; a -= c; a ^= (c >> 12); \ 3323 b -= c; b -= a; b ^= (a << 16); \ 3324 c -= a; c -= b; c ^= (b >> 5); \ 3325 a -= b; a -= c; a ^= (c >> 3); \ 3326 b -= c; b -= a; b ^= (a << 10); \ 3327 c -= a; c -= b; c ^= (b >> 15); \ 3328 } while (0) 3329 3330 mix(a, b, c); 3331 3332 #undef mix 3333 3334 return (c); 3335 } 3336 3337 static int 3338 vxlan_media_change(struct ifnet *ifp) 3339 { 3340 3341 /* Ignore. */ 3342 return (0); 3343 } 3344 3345 static void 3346 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3347 { 3348 3349 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; 3350 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3351 } 3352 3353 static int 3354 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, 3355 const struct sockaddr *sa) 3356 { 3357 3358 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); 3359 } 3360 3361 static void 3362 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, 3363 const struct sockaddr *sa) 3364 { 3365 3366 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3367 bzero(vxladdr, sizeof(*vxladdr)); 3368 3369 if (sa->sa_family == AF_INET) { 3370 vxladdr->in4 = *satoconstsin(sa); 3371 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3372 } else if (sa->sa_family == AF_INET6) { 3373 vxladdr->in6 = *satoconstsin6(sa); 3374 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3375 } 3376 } 3377 3378 static int 3379 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, 3380 const struct sockaddr *sa) 3381 { 3382 int equal; 3383 3384 if (sa->sa_family == AF_INET) { 3385 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3386 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; 3387 } else if (sa->sa_family == AF_INET6) { 3388 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3389 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); 3390 } else 3391 equal = 0; 3392 3393 return (equal); 3394 } 3395 3396 static void 3397 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, 3398 const struct sockaddr *sa) 3399 { 3400 3401 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3402 3403 if (sa->sa_family == AF_INET) { 3404 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3405 vxladdr->in4.sin_family = AF_INET; 3406 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3407 vxladdr->in4.sin_addr = *in4; 3408 } else if (sa->sa_family == AF_INET6) { 3409 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3410 vxladdr->in6.sin6_family = AF_INET6; 3411 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3412 vxladdr->in6.sin6_addr = *in6; 3413 } 3414 } 3415 3416 static int 3417 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) 3418 { 3419 const struct sockaddr *sa; 3420 int supported; 3421 3422 sa = &vxladdr->sa; 3423 supported = 0; 3424 3425 if (sa->sa_family == AF_UNSPEC && unspec != 0) { 3426 supported = 1; 3427 } else if (sa->sa_family == AF_INET) { 3428 #ifdef INET 3429 supported = 1; 3430 #endif 3431 } else if (sa->sa_family == AF_INET6) { 3432 #ifdef INET6 3433 supported = 1; 3434 #endif 3435 } 3436 3437 return (supported); 3438 } 3439 3440 static int 3441 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) 3442 { 3443 const struct sockaddr *sa; 3444 int any; 3445 3446 sa = &vxladdr->sa; 3447 3448 if (sa->sa_family == AF_INET) { 3449 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3450 any = in4->s_addr == INADDR_ANY; 3451 } else if (sa->sa_family == AF_INET6) { 3452 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3453 any = IN6_IS_ADDR_UNSPECIFIED(in6); 3454 } else 3455 any = -1; 3456 3457 return (any); 3458 } 3459 3460 static int 3461 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) 3462 { 3463 const struct sockaddr *sa; 3464 int mc; 3465 3466 sa = &vxladdr->sa; 3467 3468 if (sa->sa_family == AF_INET) { 3469 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3470 mc = IN_MULTICAST(ntohl(in4->s_addr)); 3471 } else if (sa->sa_family == AF_INET6) { 3472 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3473 mc = IN6_IS_ADDR_MULTICAST(in6); 3474 } else 3475 mc = -1; 3476 3477 return (mc); 3478 } 3479 3480 static int 3481 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) 3482 { 3483 int error; 3484 3485 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); 3486 #ifdef INET6 3487 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); 3488 #else 3489 error = EAFNOSUPPORT; 3490 #endif 3491 3492 return (error); 3493 } 3494 3495 static int 3496 vxlan_can_change_config(struct vxlan_softc *sc) 3497 { 3498 struct ifnet *ifp; 3499 3500 ifp = sc->vxl_ifp; 3501 VXLAN_LOCK_ASSERT(sc); 3502 3503 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3504 return (0); 3505 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) 3506 return (0); 3507 3508 return (1); 3509 } 3510 3511 static int 3512 vxlan_check_vni(uint32_t vni) 3513 { 3514 3515 return (vni >= VXLAN_VNI_MAX); 3516 } 3517 3518 static int 3519 vxlan_check_ttl(int ttl) 3520 { 3521 3522 return (ttl > MAXTTL); 3523 } 3524 3525 static int 3526 vxlan_check_ftable_timeout(uint32_t timeout) 3527 { 3528 3529 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); 3530 } 3531 3532 static int 3533 vxlan_check_ftable_max(uint32_t max) 3534 { 3535 3536 return (max > VXLAN_FTABLE_MAX); 3537 } 3538 3539 static void 3540 vxlan_sysctl_setup(struct vxlan_softc *sc) 3541 { 3542 struct sysctl_ctx_list *ctx; 3543 struct sysctl_oid *node; 3544 struct vxlan_statistics *stats; 3545 char namebuf[8]; 3546 3547 ctx = &sc->vxl_sysctl_ctx; 3548 stats = &sc->vxl_stats; 3549 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); 3550 3551 sysctl_ctx_init(ctx); 3552 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, 3553 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, 3554 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3555 3556 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3557 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3558 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", 3559 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, 3560 "Number of entries in forwarding table"); 3561 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", 3562 CTLFLAG_RD, &sc->vxl_ftable_max, 0, 3563 "Maximum number of entries allowed in forwarding table"); 3564 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", 3565 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, 3566 "Number of seconds between prunes of the forwarding table"); 3567 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", 3568 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 3569 sc, 0, vxlan_ftable_sysctl_dump, "A", 3570 "Dump the forwarding table entries"); 3571 3572 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3573 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3574 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3575 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, 3576 "Fowarding table reached maximum entries"); 3577 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3578 "ftable_lock_upgrade_failed", CTLFLAG_RD, 3579 &stats->ftable_lock_upgrade_failed, 0, 3580 "Forwarding table update required lock upgrade"); 3581 3582 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum", 3583 CTLFLAG_RD, &stats->txcsum, 3584 "# of times hardware assisted with tx checksum"); 3585 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso", 3586 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO"); 3587 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum", 3588 CTLFLAG_RD, &stats->rxcsum, 3589 "# of times hardware assisted with rx checksum"); 3590 } 3591 3592 static void 3593 vxlan_sysctl_destroy(struct vxlan_softc *sc) 3594 { 3595 3596 sysctl_ctx_free(&sc->vxl_sysctl_ctx); 3597 sc->vxl_sysctl_node = NULL; 3598 } 3599 3600 static int 3601 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) 3602 { 3603 char path[64]; 3604 3605 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", 3606 sc->vxl_unit, knob); 3607 TUNABLE_INT_FETCH(path, &def); 3608 3609 return (def); 3610 } 3611 3612 static void 3613 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) 3614 { 3615 struct vxlan_softc_head list; 3616 struct vxlan_socket *vso; 3617 struct vxlan_softc *sc, *tsc; 3618 3619 LIST_INIT(&list); 3620 3621 if (ifp->if_flags & IFF_RENAMING) 3622 return; 3623 if ((ifp->if_flags & IFF_MULTICAST) == 0) 3624 return; 3625 3626 VXLAN_LIST_LOCK(); 3627 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) 3628 vxlan_socket_ifdetach(vso, ifp, &list); 3629 VXLAN_LIST_UNLOCK(); 3630 3631 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { 3632 LIST_REMOVE(sc, vxl_ifdetach_list); 3633 3634 sx_xlock(&vxlan_sx); 3635 VXLAN_WLOCK(sc); 3636 if (sc->vxl_flags & VXLAN_FLAG_INIT) 3637 vxlan_init_wait(sc); 3638 vxlan_teardown_locked(sc); 3639 sx_xunlock(&vxlan_sx); 3640 } 3641 } 3642 3643 static void 3644 vxlan_load(void) 3645 { 3646 3647 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); 3648 LIST_INIT(&vxlan_socket_list); 3649 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 3650 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); 3651 3652 struct if_clone_addreq req = { 3653 .create_f = vxlan_clone_create, 3654 .destroy_f = vxlan_clone_destroy, 3655 .flags = IFC_F_AUTOUNIT, 3656 }; 3657 vxlan_cloner = ifc_attach_cloner(vxlan_name, &req); 3658 } 3659 3660 static void 3661 vxlan_unload(void) 3662 { 3663 3664 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 3665 vxlan_ifdetach_event_tag); 3666 ifc_detach_cloner(vxlan_cloner); 3667 mtx_destroy(&vxlan_list_mtx); 3668 MPASS(LIST_EMPTY(&vxlan_socket_list)); 3669 } 3670 3671 static int 3672 vxlan_modevent(module_t mod, int type, void *unused) 3673 { 3674 int error; 3675 3676 error = 0; 3677 3678 switch (type) { 3679 case MOD_LOAD: 3680 vxlan_load(); 3681 break; 3682 case MOD_UNLOAD: 3683 vxlan_unload(); 3684 break; 3685 default: 3686 error = ENOTSUP; 3687 break; 3688 } 3689 3690 return (error); 3691 } 3692 3693 static moduledata_t vxlan_mod = { 3694 "if_vxlan", 3695 vxlan_modevent, 3696 0 3697 }; 3698 3699 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3700 MODULE_VERSION(if_vxlan, 1); 3701