1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/devctl.h> 39 #include <sys/jail.h> 40 #include <sys/kassert.h> 41 #include <sys/kernel.h> 42 #include <sys/limits.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/module.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_dl.h> 59 #include <net/if_llatbl.h> 60 #include <net/if_private.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/vnet.h> 64 65 #if defined(INET) || defined(INET6) 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/ip_carp.h> 69 #include <netinet/ip_carp_nl.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <netlink/netlink.h> 88 #include <netlink/netlink_ctl.h> 89 #include <netlink/netlink_generic.h> 90 #include <netlink/netlink_message_parser.h> 91 92 #include <crypto/sha1.h> 93 94 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 95 96 struct carp_softc { 97 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 98 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 99 carp_version_t sc_version; /* carp or VRRPv3 */ 100 uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ 101 struct callout sc_ad_tmo; /* Advertising timeout. */ 102 #ifdef INET 103 struct callout sc_md_tmo; /* Master down timeout. */ 104 #endif 105 #ifdef INET6 106 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 107 #endif 108 struct mtx sc_mtx; 109 110 int sc_vhid; 111 union { 112 struct { /* sc_version == CARP_VERSION_CARP */ 113 int sc_advskew; 114 int sc_advbase; 115 struct in_addr sc_carpaddr; 116 struct in6_addr sc_carpaddr6; 117 uint64_t sc_counter; 118 bool sc_init_counter; 119 #define CARP_HMAC_PAD 64 120 unsigned char sc_key[CARP_KEY_LEN]; 121 unsigned char sc_pad[CARP_HMAC_PAD]; 122 SHA1_CTX sc_sha1; 123 }; 124 struct { /* sc_version == CARP_VERSION_VRRPv3 */ 125 uint8_t sc_vrrp_prio; 126 uint16_t sc_vrrp_adv_inter; 127 uint16_t sc_vrrp_master_inter; 128 }; 129 }; 130 int sc_naddrs; 131 int sc_naddrs6; 132 int sc_ifasiz; 133 enum { INIT = 0, BACKUP, MASTER } sc_state; 134 int sc_suppress; 135 int sc_sendad_errors; 136 #define CARP_SENDAD_MAX_ERRORS 3 137 int sc_sendad_success; 138 #define CARP_SENDAD_MIN_SUCCESS 3 139 140 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 141 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 142 }; 143 144 struct carp_if { 145 #ifdef INET 146 int cif_naddrs; 147 #endif 148 #ifdef INET6 149 int cif_naddrs6; 150 #endif 151 TAILQ_HEAD(, carp_softc) cif_vrs; 152 #ifdef INET 153 struct ip_moptions cif_imo; 154 #endif 155 #ifdef INET6 156 struct ip6_moptions cif_im6o; 157 #endif 158 struct ifnet *cif_ifp; 159 struct mtx cif_mtx; 160 uint32_t cif_flags; 161 #define CIF_PROMISC 0x00000001 162 }; 163 164 /* Kernel equivalent of struct carpreq, but with more fields for new features. 165 * */ 166 struct carpkreq { 167 int carpr_count; 168 int carpr_vhid; 169 int carpr_state; 170 int carpr_advskew; 171 int carpr_advbase; 172 unsigned char carpr_key[CARP_KEY_LEN]; 173 /* Everything above this is identical to carpreq */ 174 struct in_addr carpr_addr; 175 struct in6_addr carpr_addr6; 176 carp_version_t carpr_version; 177 uint8_t carpr_vrrp_priority; 178 uint16_t carpr_vrrp_adv_inter; 179 }; 180 181 /* 182 * Brief design of carp(4). 183 * 184 * Any carp-capable ifnet may have a list of carp softcs hanging off 185 * its ifp->if_carp pointer. Each softc represents one unique virtual 186 * host id, or vhid. The softc has a back pointer to the ifnet. All 187 * softcs are joined in a global list, which has quite limited use. 188 * 189 * Any interface address that takes part in CARP negotiation has a 190 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 191 * AF_INET or AF_INET6 address. 192 * 193 * Although, one can get the softc's backpointer to ifnet and traverse 194 * through its ifp->if_addrhead queue to find all interface addresses 195 * involved in CARP, we keep a growable array of ifaddr pointers. This 196 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 197 * do calls into the network stack, thus avoiding LORs. 198 * 199 * Locking: 200 * 201 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 202 * callout-driven events and ioctl()s. 203 * 204 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 205 * To traverse the global list we use the mutex carp_mtx. 206 * 207 * Known issues with locking: 208 * 209 * - On module unload we may race (?) with packet processing thread 210 * dereferencing our function pointers. 211 */ 212 213 /* Accept incoming CARP packets. */ 214 VNET_DEFINE_STATIC(int, carp_allow) = 1; 215 #define V_carp_allow VNET(carp_allow) 216 217 /* Set DSCP in outgoing CARP packets. */ 218 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 219 #define V_carp_dscp VNET(carp_dscp) 220 221 /* Preempt slower nodes. */ 222 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 223 #define V_carp_preempt VNET(carp_preempt) 224 225 /* Log level. */ 226 VNET_DEFINE_STATIC(int, carp_log) = 1; 227 #define V_carp_log VNET(carp_log) 228 229 /* Global advskew demotion. */ 230 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 231 #define V_carp_demotion VNET(carp_demotion) 232 233 /* Send error demotion factor. */ 234 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 235 #define V_carp_senderr_adj VNET(carp_senderr_adj) 236 237 /* Iface down demotion factor. */ 238 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 239 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 240 241 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 242 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 243 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 244 245 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 246 "CARP"); 247 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 248 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 249 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 250 "Accept incoming CARP packets"); 251 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 252 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 253 0, 0, carp_dscp_sysctl, "I", 254 "DSCP value for carp packets"); 255 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 256 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 257 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 258 &VNET_NAME(carp_log), 0, "CARP log level"); 259 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 260 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 261 0, 0, carp_demote_adj_sysctl, "I", 262 "Adjust demotion factor (skew of advskew)"); 263 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 264 CTLFLAG_VNET | CTLFLAG_RW, 265 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 266 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 267 CTLFLAG_VNET | CTLFLAG_RW, 268 &VNET_NAME(carp_ifdown_adj), 0, 269 "Interface down demotion factor adjustment"); 270 271 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 272 VNET_PCPUSTAT_SYSINIT(carpstats); 273 VNET_PCPUSTAT_SYSUNINIT(carpstats); 274 275 #define CARPSTATS_ADD(name, val) \ 276 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 277 sizeof(uint64_t)], (val)) 278 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 279 280 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 281 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 282 283 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 284 NULL, MTX_DEF) 285 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 286 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 287 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 288 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 289 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 290 NULL, MTX_DEF) 291 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 292 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 293 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 294 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 295 #define CIF_FREE(cif) do { \ 296 CIF_LOCK(cif); \ 297 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 298 carp_free_if(cif); \ 299 else \ 300 CIF_UNLOCK(cif); \ 301 } while (0) 302 303 #define CARP_LOG(...) do { \ 304 if (V_carp_log > 0) \ 305 log(LOG_INFO, "carp: " __VA_ARGS__); \ 306 } while (0) 307 308 #define CARP_DEBUG(...) do { \ 309 if (V_carp_log > 1) \ 310 log(LOG_DEBUG, __VA_ARGS__); \ 311 } while (0) 312 313 #define IFNET_FOREACH_IFA(ifp, ifa) \ 314 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 315 if ((ifa)->ifa_carp != NULL) 316 317 #define CARP_FOREACH_IFA(sc, ifa) \ 318 CARP_LOCK_ASSERT(sc); \ 319 for (int _i = 0; \ 320 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 321 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 322 ++_i) 323 324 #define IFNET_FOREACH_CARP(ifp, sc) \ 325 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 326 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 327 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 328 329 #define DEMOTE_ADVSKEW(sc) \ 330 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 331 CARP_MAXSKEW : \ 332 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 333 0 : ((sc)->sc_advskew + V_carp_demotion))) 334 335 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 336 static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); 337 static struct carp_softc 338 *carp_alloc(struct ifnet *, carp_version_t, int); 339 static void carp_destroy(struct carp_softc *); 340 static struct carp_if 341 *carp_alloc_if(struct ifnet *); 342 static void carp_free_if(struct carp_if *); 343 static void carp_set_state(struct carp_softc *, int, const char* reason); 344 static void carp_sc_state(struct carp_softc *); 345 static void carp_setrun(struct carp_softc *, sa_family_t); 346 static void carp_master_down(void *); 347 static void carp_master_down_locked(struct carp_softc *, 348 const char* reason); 349 static void carp_send_ad_locked(struct carp_softc *); 350 static void vrrp_send_ad_locked(struct carp_softc *); 351 static void carp_addroute(struct carp_softc *); 352 static void carp_ifa_addroute(struct ifaddr *); 353 static void carp_delroute(struct carp_softc *); 354 static void carp_ifa_delroute(struct ifaddr *); 355 static void carp_send_ad_all(void *, int); 356 static void carp_demote_adj(int, char *); 357 358 static LIST_HEAD(, carp_softc) carp_list = LIST_HEAD_INITIALIZER(carp_list); 359 static struct mtx carp_mtx; 360 static struct sx carp_sx; 361 static struct task carp_sendall_task = 362 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 363 364 static int 365 carp_is_supported_if(if_t ifp) 366 { 367 if (ifp == NULL) 368 return (ENXIO); 369 370 switch (ifp->if_type) { 371 case IFT_ETHER: 372 case IFT_L2VLAN: 373 case IFT_BRIDGE: 374 break; 375 default: 376 return (EOPNOTSUPP); 377 } 378 379 return (0); 380 } 381 382 static void 383 carp_hmac_prepare(struct carp_softc *sc) 384 { 385 uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; 386 uint8_t vhid = sc->sc_vhid & 0xff; 387 struct ifaddr *ifa; 388 int i, found; 389 #ifdef INET 390 struct in_addr last, cur, in; 391 #endif 392 #ifdef INET6 393 struct in6_addr last6, cur6, in6; 394 #endif 395 396 CARP_LOCK_ASSERT(sc); 397 MPASS(sc->sc_version == CARP_VERSION_CARP); 398 399 /* Compute ipad from key. */ 400 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 401 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 402 for (i = 0; i < sizeof(sc->sc_pad); i++) 403 sc->sc_pad[i] ^= 0x36; 404 405 /* Precompute first part of inner hash. */ 406 SHA1Init(&sc->sc_sha1); 407 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 408 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 409 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 410 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 411 #ifdef INET 412 cur.s_addr = 0; 413 do { 414 found = 0; 415 last = cur; 416 cur.s_addr = 0xffffffff; 417 CARP_FOREACH_IFA(sc, ifa) { 418 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 419 if (ifa->ifa_addr->sa_family == AF_INET && 420 ntohl(in.s_addr) > ntohl(last.s_addr) && 421 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 422 cur.s_addr = in.s_addr; 423 found++; 424 } 425 } 426 if (found) 427 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 428 } while (found); 429 #endif /* INET */ 430 #ifdef INET6 431 memset(&cur6, 0, sizeof(cur6)); 432 do { 433 found = 0; 434 last6 = cur6; 435 memset(&cur6, 0xff, sizeof(cur6)); 436 CARP_FOREACH_IFA(sc, ifa) { 437 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 438 if (IN6_IS_SCOPE_EMBED(&in6)) 439 in6.s6_addr16[1] = 0; 440 if (ifa->ifa_addr->sa_family == AF_INET6 && 441 memcmp(&in6, &last6, sizeof(in6)) > 0 && 442 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 443 cur6 = in6; 444 found++; 445 } 446 } 447 if (found) 448 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 449 } while (found); 450 #endif /* INET6 */ 451 452 /* convert ipad to opad */ 453 for (i = 0; i < sizeof(sc->sc_pad); i++) 454 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 455 } 456 457 static void 458 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 459 unsigned char md[20]) 460 { 461 SHA1_CTX sha1ctx; 462 463 CARP_LOCK_ASSERT(sc); 464 465 /* fetch first half of inner hash */ 466 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 467 468 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 469 SHA1Final(md, &sha1ctx); 470 471 /* outer hash */ 472 SHA1Init(&sha1ctx); 473 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 474 SHA1Update(&sha1ctx, md, 20); 475 SHA1Final(md, &sha1ctx); 476 } 477 478 static int 479 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 480 unsigned char md[20]) 481 { 482 unsigned char md2[20]; 483 484 CARP_LOCK_ASSERT(sc); 485 486 carp_hmac_generate(sc, counter, md2); 487 488 return (bcmp(md, md2, sizeof(md2))); 489 } 490 491 static int 492 vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) 493 { 494 uint16_t cksum; 495 496 /* 497 * Note that VRRPv3 checksums are different from CARP checksums. 498 * Carp just calculates the checksum over the packet. 499 * VRRPv3 includes the pseudo-header checksum as well. 500 */ 501 cksum = in_cksum_skip(m, off + len, off); 502 cksum -= phdrcksum; 503 504 return (cksum); 505 } 506 507 /* 508 * process input packet. 509 * we have rearranged checks order compared to the rfc, 510 * but it seems more efficient this way or not possible otherwise. 511 */ 512 #ifdef INET 513 static int 514 carp_input(struct mbuf **mp, int *offp, int proto) 515 { 516 struct mbuf *m = *mp; 517 struct ip *ip; 518 struct vrrpv3_header *vh; 519 int iplen; 520 int minlen; 521 int totlen; 522 523 iplen = *offp; 524 *mp = NULL; 525 526 CARPSTATS_INC(carps_ipackets); 527 528 if (!V_carp_allow) { 529 m_freem(m); 530 return (IPPROTO_DONE); 531 } 532 533 /* Ensure we have enough header to figure out the version. */ 534 if (m->m_pkthdr.len < iplen + sizeof(*vh)) { 535 CARPSTATS_INC(carps_badlen); 536 CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " 537 "on %s\n", __func__, m->m_len - sizeof(struct ip), 538 if_name(m->m_pkthdr.rcvif)); 539 m_freem(m); 540 return (IPPROTO_DONE); 541 } 542 543 if (m->m_len < iplen + sizeof(*vh)) { 544 if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { 545 CARPSTATS_INC(carps_hdrops); 546 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 547 return (IPPROTO_DONE); 548 } 549 } 550 ip = mtod(m, struct ip *); 551 totlen = ntohs(ip->ip_len); 552 vh = (struct vrrpv3_header *)((char *)ip + iplen); 553 554 switch (vh->vrrp_version) { 555 case CARP_VERSION_CARP: 556 minlen = sizeof(struct carp_header); 557 break; 558 case CARP_VERSION_VRRPv3: 559 minlen = sizeof(struct vrrpv3_header); 560 break; 561 default: 562 CARPSTATS_INC(carps_badver); 563 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 564 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 565 m_freem(m); 566 return (IPPROTO_DONE); 567 } 568 569 /* And now check the length again but with the real minimal length. */ 570 if (m->m_pkthdr.len < iplen + minlen) { 571 CARPSTATS_INC(carps_badlen); 572 CARP_DEBUG("%s: received len %zd < %d " 573 "on %s\n", __func__, m->m_len - sizeof(struct ip), 574 iplen + minlen, 575 if_name(m->m_pkthdr.rcvif)); 576 m_freem(m); 577 return (IPPROTO_DONE); 578 } 579 580 if (m->m_len < iplen + minlen) { 581 if ((m = m_pullup(m, iplen + minlen)) == NULL) { 582 CARPSTATS_INC(carps_hdrops); 583 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 584 return (IPPROTO_DONE); 585 } 586 ip = mtod(m, struct ip *); 587 vh = (struct vrrpv3_header *)((char *)ip + iplen); 588 } 589 590 switch (vh->vrrp_version) { 591 case CARP_VERSION_CARP: { 592 struct carp_header *ch; 593 594 /* verify the CARP checksum */ 595 if (in_cksum_skip(m, totlen, iplen)) { 596 CARPSTATS_INC(carps_badsum); 597 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 598 if_name(m->m_pkthdr.rcvif)); 599 m_freem(m); 600 break; 601 } 602 ch = (struct carp_header *)((char *)ip + iplen); 603 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 604 break; 605 } 606 case CARP_VERSION_VRRPv3: { 607 uint16_t phdrcksum; 608 609 phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 610 htonl((u_short)(totlen - iplen) + ip->ip_p)); 611 vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, 612 phdrcksum); 613 break; 614 } 615 default: 616 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 617 } 618 619 return (IPPROTO_DONE); 620 } 621 #endif 622 623 #ifdef INET6 624 static int 625 carp6_input(struct mbuf **mp, int *offp, int proto) 626 { 627 struct mbuf *m = *mp; 628 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 629 struct vrrpv3_header *vh; 630 u_int len, minlen; 631 632 CARPSTATS_INC(carps_ipackets6); 633 634 if (!V_carp_allow) { 635 m_freem(m); 636 return (IPPROTO_DONE); 637 } 638 639 /* check if received on a valid carp interface */ 640 if (m->m_pkthdr.rcvif->if_carp == NULL) { 641 CARPSTATS_INC(carps_badif); 642 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 643 __func__, if_name(m->m_pkthdr.rcvif)); 644 m_freem(m); 645 return (IPPROTO_DONE); 646 } 647 648 if (m->m_len < *offp + sizeof(*vh)) { 649 len = m->m_len; 650 m = m_pullup(m, *offp + sizeof(*vh)); 651 if (m == NULL) { 652 CARPSTATS_INC(carps_badlen); 653 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 654 return (IPPROTO_DONE); 655 } 656 ip6 = mtod(m, struct ip6_hdr *); 657 } 658 vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); 659 660 switch (vh->vrrp_version) { 661 case CARP_VERSION_CARP: 662 minlen = sizeof(struct carp_header); 663 break; 664 case CARP_VERSION_VRRPv3: 665 minlen = sizeof(struct vrrpv3_header); 666 break; 667 default: 668 CARPSTATS_INC(carps_badver); 669 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 670 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 671 m_freem(m); 672 return (IPPROTO_DONE); 673 } 674 675 /* And now check the length again but with the real minimal length. */ 676 if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { 677 CARPSTATS_INC(carps_badlen); 678 CARP_DEBUG("%s: received len %zd < %zd " 679 "on %s\n", __func__, m->m_len - sizeof(struct ip), 680 sizeof(*ip6) + minlen, 681 if_name(m->m_pkthdr.rcvif)); 682 m_freem(m); 683 return (IPPROTO_DONE); 684 } 685 686 if (m->m_len < sizeof(*ip6) + minlen) { 687 if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { 688 CARPSTATS_INC(carps_hdrops); 689 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 690 return (IPPROTO_DONE); 691 } 692 ip6 = mtod(m, struct ip6_hdr *); 693 vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 694 } 695 696 switch (vh->vrrp_version) { 697 case CARP_VERSION_CARP: { 698 struct carp_header *ch; 699 700 /* verify the CARP checksum */ 701 if (in_cksum_skip(m, *offp + sizeof(struct carp_header), 702 *offp)) { 703 CARPSTATS_INC(carps_badsum); 704 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 705 if_name(m->m_pkthdr.rcvif)); 706 m_freem(m); 707 break; 708 } 709 ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); 710 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 711 break; 712 } 713 case CARP_VERSION_VRRPv3: { 714 uint16_t phdrcksum; 715 716 phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 717 ip6->ip6_nxt, 0); 718 vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, 719 ntohs(ip6->ip6_plen), phdrcksum); 720 break; 721 } 722 default: 723 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 724 } 725 return (IPPROTO_DONE); 726 } 727 #endif /* INET6 */ 728 729 /* 730 * This routine should not be necessary at all, but some switches 731 * (VMWare ESX vswitches) can echo our own packets back at us, 732 * and we must ignore them or they will cause us to drop out of 733 * MASTER mode. 734 * 735 * We cannot catch all cases of network loops. Instead, what we 736 * do here is catch any packet that arrives with a carp header 737 * with a VHID of 0, that comes from an address that is our own. 738 * These packets are by definition "from us" (even if they are from 739 * a misconfigured host that is pretending to be us). 740 * 741 * The VHID test is outside this mini-function. 742 */ 743 static int 744 carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 745 { 746 #ifdef INET 747 struct ip *ip4; 748 struct in_addr in4; 749 #endif 750 #ifdef INET6 751 struct ip6_hdr *ip6; 752 struct in6_addr in6; 753 #endif 754 755 switch (af) { 756 #ifdef INET 757 case AF_INET: 758 ip4 = mtod(m, struct ip *); 759 in4 = ifatoia(ifa)->ia_addr.sin_addr; 760 return (in4.s_addr == ip4->ip_src.s_addr); 761 #endif 762 #ifdef INET6 763 case AF_INET6: 764 ip6 = mtod(m, struct ip6_hdr *); 765 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 766 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 767 #endif 768 default: 769 break; 770 } 771 return (0); 772 } 773 774 static struct ifaddr * 775 carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) 776 { 777 struct ifnet *ifp = m->m_pkthdr.rcvif; 778 struct ifaddr *ifa, *match; 779 int error; 780 781 NET_EPOCH_ASSERT(); 782 783 /* 784 * Verify that the VHID is valid on the receiving interface. 785 * 786 * There should be just one match. If there are none 787 * the VHID is not valid and we drop the packet. If 788 * there are multiple VHID matches, take just the first 789 * one, for compatibility with previous code. While we're 790 * scanning, check for obvious loops in the network topology 791 * (these should never happen, and as noted above, we may 792 * miss real loops; this is just a double-check). 793 */ 794 error = 0; 795 match = NULL; 796 IFNET_FOREACH_IFA(ifp, ifa) { 797 if (match == NULL && ifa->ifa_carp != NULL && 798 ifa->ifa_addr->sa_family == af && 799 ifa->ifa_carp->sc_vhid == vhid) 800 match = ifa; 801 if (vhid == 0 && carp_source_is_self(m, ifa, af)) 802 error = ELOOP; 803 } 804 ifa = error ? NULL : match; 805 if (ifa != NULL) 806 ifa_ref(ifa); 807 808 if (ifa == NULL) { 809 if (error == ELOOP) { 810 CARP_DEBUG("dropping looped packet on interface %s\n", 811 if_name(ifp)); 812 CARPSTATS_INC(carps_badif); /* ??? */ 813 } else { 814 CARPSTATS_INC(carps_badvhid); 815 } 816 } 817 818 return (ifa); 819 } 820 821 static void 822 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 823 { 824 struct ifnet *ifp = m->m_pkthdr.rcvif; 825 struct ifaddr *ifa; 826 struct carp_softc *sc; 827 uint64_t tmp_counter; 828 struct timeval sc_tv, ch_tv; 829 bool multicast = false; 830 831 NET_EPOCH_ASSERT(); 832 MPASS(ch->carp_version == CARP_VERSION_CARP); 833 834 ifa = carp_find_ifa(m, af, ch->carp_vhid); 835 if (ifa == NULL) { 836 m_freem(m); 837 return; 838 } 839 840 sc = ifa->ifa_carp; 841 CARP_LOCK(sc); 842 843 /* verify the CARP version. */ 844 if (sc->sc_version != CARP_VERSION_CARP) { 845 CARP_UNLOCK(sc); 846 847 CARPSTATS_INC(carps_badver); 848 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 849 ch->carp_version); 850 ifa_free(ifa); 851 m_freem(m); 852 return; 853 } 854 855 if (ifa->ifa_addr->sa_family == AF_INET) { 856 multicast = IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)); 857 } else { 858 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 859 } 860 ifa_free(ifa); 861 862 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 863 if (multicast && ttl != CARP_DFLTTL) { 864 CARPSTATS_INC(carps_badttl); 865 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 866 ttl, if_name(m->m_pkthdr.rcvif)); 867 goto out; 868 } 869 870 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 871 CARPSTATS_INC(carps_badauth); 872 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 873 sc->sc_vhid, if_name(ifp)); 874 goto out; 875 } 876 877 tmp_counter = ntohl(ch->carp_counter[0]); 878 tmp_counter = tmp_counter<<32; 879 tmp_counter += ntohl(ch->carp_counter[1]); 880 881 /* XXX Replay protection goes here */ 882 883 sc->sc_init_counter = false; 884 sc->sc_counter = tmp_counter; 885 886 sc_tv.tv_sec = sc->sc_advbase; 887 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 888 ch_tv.tv_sec = ch->carp_advbase; 889 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 890 891 switch (sc->sc_state) { 892 case INIT: 893 break; 894 case MASTER: 895 /* 896 * If we receive an advertisement from a master who's going to 897 * be more frequent than us, go into BACKUP state. 898 */ 899 if (timevalcmp(&sc_tv, &ch_tv, >) || 900 timevalcmp(&sc_tv, &ch_tv, ==)) { 901 callout_stop(&sc->sc_ad_tmo); 902 carp_set_state(sc, BACKUP, 903 "more frequent advertisement received"); 904 carp_setrun(sc, 0); 905 carp_delroute(sc); 906 } 907 break; 908 case BACKUP: 909 /* 910 * If we're pre-empting masters who advertise slower than us, 911 * and this one claims to be slower, treat him as down. 912 */ 913 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 914 carp_master_down_locked(sc, 915 "preempting a slower master"); 916 break; 917 } 918 919 /* 920 * If the master is going to advertise at such a low frequency 921 * that he's guaranteed to time out, we'd might as well just 922 * treat him as timed out now. 923 */ 924 sc_tv.tv_sec = sc->sc_advbase * 3; 925 if (timevalcmp(&sc_tv, &ch_tv, <)) { 926 carp_master_down_locked(sc, "master will time out"); 927 break; 928 } 929 930 /* 931 * Otherwise, we reset the counter and wait for the next 932 * advertisement. 933 */ 934 carp_setrun(sc, af); 935 break; 936 } 937 938 out: 939 CARP_UNLOCK(sc); 940 m_freem(m); 941 } 942 943 static void 944 vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, 945 int len, uint16_t phdrcksum) 946 { 947 struct vrrpv3_header *vh = mtodo(m, off); 948 struct ifnet *ifp = m->m_pkthdr.rcvif; 949 struct ifaddr *ifa; 950 struct carp_softc *sc; 951 952 NET_EPOCH_ASSERT(); 953 MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); 954 955 ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); 956 if (ifa == NULL) { 957 m_freem(m); 958 return; 959 } 960 961 sc = ifa->ifa_carp; 962 CARP_LOCK(sc); 963 964 ifa_free(ifa); 965 966 /* verify the CARP version. */ 967 if (sc->sc_version != CARP_VERSION_VRRPv3) { 968 CARP_UNLOCK(sc); 969 970 CARPSTATS_INC(carps_badver); 971 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 972 vh->vrrp_version); 973 m_freem(m); 974 return; 975 } 976 977 /* verify that the IP TTL is 255. */ 978 if (ttl != CARP_DFLTTL) { 979 CARPSTATS_INC(carps_badttl); 980 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 981 ttl, if_name(m->m_pkthdr.rcvif)); 982 goto out; 983 } 984 985 if (vrrp_checksum_verify(m, off, len, phdrcksum)) { 986 CARPSTATS_INC(carps_badsum); 987 CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, 988 sc->sc_vhid, if_name(ifp)); 989 goto out; 990 } 991 992 /* RFC9568, 7.1 Receiving VRRP packets. */ 993 if (sc->sc_vrrp_prio == 255) { 994 CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", 995 __func__); 996 goto out; 997 } 998 999 /* XXX TODO Check IP address payload. */ 1000 1001 sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); 1002 1003 switch (sc->sc_state) { 1004 case INIT: 1005 break; 1006 case MASTER: 1007 /* 1008 * If we receive an advertisement from a master who's going to 1009 * be more frequent than us, go into BACKUP state. 1010 * Same if the peer has a higher priority than us. 1011 */ 1012 if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || 1013 vh->vrrp_priority > sc->sc_vrrp_prio) { 1014 callout_stop(&sc->sc_ad_tmo); 1015 carp_set_state(sc, BACKUP, 1016 "more frequent advertisement received"); 1017 carp_setrun(sc, 0); 1018 carp_delroute(sc); 1019 } 1020 break; 1021 case BACKUP: 1022 /* 1023 * If we're pre-empting masters who advertise slower than us, 1024 * and this one claims to be slower, treat him as down. 1025 */ 1026 if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter 1027 || vh->vrrp_priority < sc->sc_vrrp_prio)) { 1028 carp_master_down_locked(sc, 1029 "preempting a slower master"); 1030 break; 1031 } 1032 1033 /* 1034 * Otherwise, we reset the counter and wait for the next 1035 * advertisement. 1036 */ 1037 carp_setrun(sc, af); 1038 break; 1039 } 1040 1041 out: 1042 CARP_UNLOCK(sc); 1043 m_freem(m); 1044 } 1045 1046 static int 1047 carp_tag(struct carp_softc *sc, struct mbuf *m) 1048 { 1049 struct m_tag *mtag; 1050 1051 /* Tag packet for carp_output */ 1052 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), 1053 M_NOWAIT)) == NULL) { 1054 m_freem(m); 1055 CARPSTATS_INC(carps_onomem); 1056 return (ENOMEM); 1057 } 1058 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1059 m_tag_prepend(m, mtag); 1060 1061 return (0); 1062 } 1063 1064 static void 1065 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 1066 { 1067 1068 MPASS(sc->sc_version == CARP_VERSION_CARP); 1069 1070 if (sc->sc_init_counter) { 1071 /* this could also be seconds since unix epoch */ 1072 sc->sc_counter = arc4random(); 1073 sc->sc_counter = sc->sc_counter << 32; 1074 sc->sc_counter += arc4random(); 1075 } else 1076 sc->sc_counter++; 1077 1078 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 1079 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 1080 1081 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1082 } 1083 1084 static inline void 1085 send_ad_locked(struct carp_softc *sc) 1086 { 1087 switch (sc->sc_version) { 1088 case CARP_VERSION_CARP: 1089 carp_send_ad_locked(sc); 1090 break; 1091 case CARP_VERSION_VRRPv3: 1092 vrrp_send_ad_locked(sc); 1093 break; 1094 } 1095 } 1096 1097 /* 1098 * To avoid LORs and possible recursions this function shouldn't 1099 * be called directly, but scheduled via taskqueue. 1100 */ 1101 static void 1102 carp_send_ad_all(void *ctx __unused, int pending __unused) 1103 { 1104 struct carp_softc *sc; 1105 struct epoch_tracker et; 1106 1107 NET_EPOCH_ENTER(et); 1108 mtx_lock(&carp_mtx); 1109 LIST_FOREACH(sc, &carp_list, sc_next) 1110 if (sc->sc_state == MASTER) { 1111 CARP_LOCK(sc); 1112 CURVNET_SET(sc->sc_carpdev->if_vnet); 1113 send_ad_locked(sc); 1114 CURVNET_RESTORE(); 1115 CARP_UNLOCK(sc); 1116 } 1117 mtx_unlock(&carp_mtx); 1118 NET_EPOCH_EXIT(et); 1119 } 1120 1121 /* Send a periodic advertisement, executed in callout context. */ 1122 static void 1123 carp_callout(void *v) 1124 { 1125 struct carp_softc *sc = v; 1126 struct epoch_tracker et; 1127 1128 NET_EPOCH_ENTER(et); 1129 CARP_LOCK_ASSERT(sc); 1130 CURVNET_SET(sc->sc_carpdev->if_vnet); 1131 send_ad_locked(sc); 1132 CURVNET_RESTORE(); 1133 CARP_UNLOCK(sc); 1134 NET_EPOCH_EXIT(et); 1135 } 1136 1137 static void 1138 carp_send_ad_error(struct carp_softc *sc, int error) 1139 { 1140 1141 /* 1142 * We track errors and successful sends with this logic: 1143 * - Any error resets success counter to 0. 1144 * - MAX_ERRORS triggers demotion. 1145 * - MIN_SUCCESS successes resets error counter to 0. 1146 * - MIN_SUCCESS reverts demotion, if it was triggered before. 1147 */ 1148 if (error) { 1149 if (sc->sc_sendad_errors < INT_MAX) 1150 sc->sc_sendad_errors++; 1151 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1152 static const char fmt[] = "send error %d on %s"; 1153 char msg[sizeof(fmt) + IFNAMSIZ]; 1154 1155 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 1156 carp_demote_adj(V_carp_senderr_adj, msg); 1157 } 1158 sc->sc_sendad_success = 0; 1159 } else if (sc->sc_sendad_errors > 0) { 1160 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 1161 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1162 static const char fmt[] = "send ok on %s"; 1163 char msg[sizeof(fmt) + IFNAMSIZ]; 1164 1165 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 1166 carp_demote_adj(-V_carp_senderr_adj, msg); 1167 } 1168 sc->sc_sendad_errors = 0; 1169 } 1170 } 1171 } 1172 1173 /* 1174 * Pick the best ifaddr on the given ifp for sending CARP 1175 * advertisements. 1176 * 1177 * "Best" here is defined by ifa_preferred(). This function is much 1178 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 1179 * 1180 * (This could be simplified to return the actual address, except that 1181 * it has a different format in AF_INET and AF_INET6.) 1182 */ 1183 static struct ifaddr * 1184 carp_best_ifa(int af, struct ifnet *ifp) 1185 { 1186 struct ifaddr *ifa, *best; 1187 1188 NET_EPOCH_ASSERT(); 1189 1190 if (af >= AF_MAX) 1191 return (NULL); 1192 best = NULL; 1193 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1194 if (ifa->ifa_addr->sa_family == af && 1195 (best == NULL || ifa_preferred(best, ifa))) 1196 best = ifa; 1197 } 1198 if (best != NULL) 1199 ifa_ref(best); 1200 return (best); 1201 } 1202 1203 static void 1204 carp_send_ad_locked(struct carp_softc *sc) 1205 { 1206 struct carp_header ch; 1207 struct timeval tv; 1208 struct ifaddr *ifa; 1209 struct carp_header *ch_ptr; 1210 struct mbuf *m; 1211 int len, advskew; 1212 1213 NET_EPOCH_ASSERT(); 1214 CARP_LOCK_ASSERT(sc); 1215 MPASS(sc->sc_version == CARP_VERSION_CARP); 1216 1217 advskew = DEMOTE_ADVSKEW(sc); 1218 tv.tv_sec = sc->sc_advbase; 1219 tv.tv_usec = advskew * 1000000 / 256; 1220 1221 ch.carp_version = CARP_VERSION_CARP; 1222 ch.carp_type = CARP_ADVERTISEMENT; 1223 ch.carp_vhid = sc->sc_vhid; 1224 ch.carp_advbase = sc->sc_advbase; 1225 ch.carp_advskew = advskew; 1226 ch.carp_authlen = 7; /* XXX DEFINE */ 1227 ch.carp_pad1 = 0; /* must be zero */ 1228 ch.carp_cksum = 0; 1229 1230 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 1231 1232 #ifdef INET 1233 if (sc->sc_naddrs) { 1234 struct ip *ip; 1235 1236 m = m_gethdr(M_NOWAIT, MT_DATA); 1237 if (m == NULL) { 1238 CARPSTATS_INC(carps_onomem); 1239 goto resched; 1240 } 1241 len = sizeof(*ip) + sizeof(ch); 1242 m->m_pkthdr.len = len; 1243 m->m_pkthdr.rcvif = NULL; 1244 m->m_len = len; 1245 M_ALIGN(m, m->m_len); 1246 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr))) 1247 m->m_flags |= M_MCAST; 1248 ip = mtod(m, struct ip *); 1249 ip->ip_v = IPVERSION; 1250 ip->ip_hl = sizeof(*ip) >> 2; 1251 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1252 ip->ip_len = htons(len); 1253 ip->ip_off = htons(IP_DF); 1254 ip->ip_ttl = CARP_DFLTTL; 1255 ip->ip_p = IPPROTO_CARP; 1256 ip->ip_sum = 0; 1257 ip_fillid(ip, V_ip_random_id); 1258 1259 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1260 if (ifa != NULL) { 1261 ip->ip_src.s_addr = 1262 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1263 ifa_free(ifa); 1264 } else 1265 ip->ip_src.s_addr = 0; 1266 ip->ip_dst = sc->sc_carpaddr; 1267 1268 ch_ptr = (struct carp_header *)(&ip[1]); 1269 bcopy(&ch, ch_ptr, sizeof(ch)); 1270 carp_prepare_ad(m, sc, ch_ptr); 1271 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && 1272 carp_tag(sc, m) != 0) 1273 goto resched; 1274 1275 m->m_data += sizeof(*ip); 1276 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1277 m->m_data -= sizeof(*ip); 1278 1279 CARPSTATS_INC(carps_opackets); 1280 1281 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1282 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1283 } 1284 #endif /* INET */ 1285 #ifdef INET6 1286 if (sc->sc_naddrs6) { 1287 struct ip6_hdr *ip6; 1288 1289 m = m_gethdr(M_NOWAIT, MT_DATA); 1290 if (m == NULL) { 1291 CARPSTATS_INC(carps_onomem); 1292 goto resched; 1293 } 1294 len = sizeof(*ip6) + sizeof(ch); 1295 m->m_pkthdr.len = len; 1296 m->m_pkthdr.rcvif = NULL; 1297 m->m_len = len; 1298 M_ALIGN(m, m->m_len); 1299 ip6 = mtod(m, struct ip6_hdr *); 1300 bzero(ip6, sizeof(*ip6)); 1301 ip6->ip6_vfc |= IPV6_VERSION; 1302 /* Traffic class isn't defined in ip6 struct instead 1303 * it gets offset into flowid field */ 1304 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1305 IPTOS_DSCP_OFFSET)); 1306 ip6->ip6_hlim = CARP_DFLTTL; 1307 ip6->ip6_nxt = IPPROTO_CARP; 1308 1309 /* set the source address */ 1310 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1311 if (ifa != NULL) { 1312 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1313 sizeof(struct in6_addr)); 1314 ifa_free(ifa); 1315 } else 1316 /* This should never happen with IPv6. */ 1317 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1318 1319 /* Set the multicast destination. */ 1320 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1321 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1322 IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { 1323 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1324 m_freem(m); 1325 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1326 goto resched; 1327 } 1328 } 1329 1330 ch_ptr = (struct carp_header *)(&ip6[1]); 1331 bcopy(&ch, ch_ptr, sizeof(ch)); 1332 carp_prepare_ad(m, sc, ch_ptr); 1333 if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && 1334 carp_tag(sc, m) != 0) 1335 goto resched; 1336 1337 m->m_data += sizeof(*ip6); 1338 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1339 m->m_data -= sizeof(*ip6); 1340 1341 CARPSTATS_INC(carps_opackets6); 1342 1343 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1344 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1345 } 1346 #endif /* INET6 */ 1347 1348 resched: 1349 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); 1350 } 1351 1352 static void 1353 vrrp_send_ad_locked(struct carp_softc *sc) 1354 { 1355 struct vrrpv3_header *vh_ptr; 1356 struct ifaddr *ifa; 1357 struct mbuf *m; 1358 int len; 1359 struct vrrpv3_header vh = { 1360 .vrrp_version = CARP_VERSION_VRRPv3, 1361 .vrrp_type = VRRP_TYPE_ADVERTISEMENT, 1362 .vrrp_vrtid = sc->sc_vhid, 1363 .vrrp_priority = sc->sc_vrrp_prio, 1364 .vrrp_count_addr = 0, 1365 .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), 1366 .vrrp_checksum = 0, 1367 }; 1368 1369 NET_EPOCH_ASSERT(); 1370 CARP_LOCK_ASSERT(sc); 1371 MPASS(sc->sc_version == CARP_VERSION_VRRPv3); 1372 1373 #ifdef INET 1374 if (sc->sc_naddrs) { 1375 struct ip *ip; 1376 1377 m = m_gethdr(M_NOWAIT, MT_DATA); 1378 if (m == NULL) { 1379 CARPSTATS_INC(carps_onomem); 1380 goto resched; 1381 } 1382 len = sizeof(*ip) + sizeof(vh); 1383 m->m_pkthdr.len = len; 1384 m->m_pkthdr.rcvif = NULL; 1385 m->m_len = len; 1386 M_ALIGN(m, m->m_len); 1387 m->m_flags |= M_MCAST; 1388 ip = mtod(m, struct ip *); 1389 ip->ip_v = IPVERSION; 1390 ip->ip_hl = sizeof(*ip) >> 2; 1391 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1392 ip->ip_off = htons(IP_DF); 1393 ip->ip_ttl = CARP_DFLTTL; 1394 ip->ip_p = IPPROTO_CARP; 1395 ip->ip_sum = 0; 1396 ip_fillid(ip, V_ip_random_id); 1397 1398 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1399 if (ifa != NULL) { 1400 ip->ip_src.s_addr = 1401 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1402 ifa_free(ifa); 1403 } else 1404 ip->ip_src.s_addr = 0; 1405 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1406 1407 /* Include the IP addresses in the announcement. */ 1408 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1409 struct sockaddr_in *in; 1410 1411 MPASS(sc->sc_ifas[i] != NULL); 1412 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) 1413 continue; 1414 1415 in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; 1416 1417 if (m_append(m, sizeof(in->sin_addr), 1418 (caddr_t)&in->sin_addr) != 1) { 1419 m_freem(m); 1420 goto resched; 1421 } 1422 1423 vh.vrrp_count_addr++; 1424 len += sizeof(in->sin_addr); 1425 } 1426 ip->ip_len = htons(len); 1427 1428 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); 1429 bcopy(&vh, vh_ptr, sizeof(vh)); 1430 1431 vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, 1432 ip->ip_dst.s_addr, 1433 htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); 1434 vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); 1435 1436 if (carp_tag(sc, m)) 1437 goto resched; 1438 1439 CARPSTATS_INC(carps_opackets); 1440 1441 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1442 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1443 } 1444 #endif 1445 #ifdef INET6 1446 if (sc->sc_naddrs6) { 1447 struct ip6_hdr *ip6; 1448 1449 m = m_gethdr(M_NOWAIT, MT_DATA); 1450 if (m == NULL) { 1451 CARPSTATS_INC(carps_onomem); 1452 goto resched; 1453 } 1454 len = sizeof(*ip6) + sizeof(vh); 1455 m->m_pkthdr.len = len; 1456 m->m_pkthdr.rcvif = NULL; 1457 m->m_len = len; 1458 M_ALIGN(m, m->m_len); 1459 m->m_flags |= M_MCAST; 1460 ip6 = mtod(m, struct ip6_hdr *); 1461 bzero(ip6, sizeof(*ip6)); 1462 ip6->ip6_vfc |= IPV6_VERSION; 1463 /* Traffic class isn't defined in ip6 struct instead 1464 * it gets offset into flowid field */ 1465 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1466 IPTOS_DSCP_OFFSET)); 1467 ip6->ip6_hlim = CARP_DFLTTL; 1468 ip6->ip6_nxt = IPPROTO_CARP; 1469 1470 /* set the source address */ 1471 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1472 if (ifa != NULL) { 1473 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1474 sizeof(struct in6_addr)); 1475 ifa_free(ifa); 1476 } else 1477 /* This should never happen with IPv6. */ 1478 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1479 1480 /* Set the multicast destination. */ 1481 bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); 1482 ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1483 ip6->ip6_dst.s6_addr8[15] = 0x12; 1484 1485 /* Include the IP addresses in the announcement. */ 1486 len = sizeof(vh); 1487 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1488 struct sockaddr_in6 *in6; 1489 1490 MPASS(sc->sc_ifas[i] != NULL); 1491 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) 1492 continue; 1493 1494 in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; 1495 1496 if (m_append(m, sizeof(in6->sin6_addr), 1497 (char *)&in6->sin6_addr) != 1) { 1498 m_freem(m); 1499 goto resched; 1500 } 1501 1502 vh.vrrp_count_addr++; 1503 len += sizeof(in6->sin6_addr); 1504 } 1505 ip6->ip6_plen = htonl(len); 1506 1507 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 1508 bcopy(&vh, vh_ptr, sizeof(vh)); 1509 1510 vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); 1511 vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); 1512 1513 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1514 m_freem(m); 1515 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1516 goto resched; 1517 } 1518 1519 if (carp_tag(sc, m)) 1520 goto resched; 1521 CARPSTATS_INC(carps_opackets6); 1522 1523 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1524 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1525 } 1526 #endif 1527 1528 resched: 1529 callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, 1530 carp_callout, sc); 1531 } 1532 1533 static void 1534 carp_addroute(struct carp_softc *sc) 1535 { 1536 struct ifaddr *ifa; 1537 1538 CARP_FOREACH_IFA(sc, ifa) 1539 carp_ifa_addroute(ifa); 1540 } 1541 1542 static void 1543 carp_ifa_addroute(struct ifaddr *ifa) 1544 { 1545 1546 switch (ifa->ifa_addr->sa_family) { 1547 #ifdef INET 1548 case AF_INET: 1549 in_addprefix(ifatoia(ifa)); 1550 ifa_add_loopback_route(ifa, 1551 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1552 break; 1553 #endif 1554 #ifdef INET6 1555 case AF_INET6: 1556 ifa_add_loopback_route(ifa, 1557 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1558 nd6_add_ifa_lle(ifatoia6(ifa)); 1559 break; 1560 #endif 1561 } 1562 } 1563 1564 static void 1565 carp_delroute(struct carp_softc *sc) 1566 { 1567 struct ifaddr *ifa; 1568 1569 CARP_FOREACH_IFA(sc, ifa) 1570 carp_ifa_delroute(ifa); 1571 } 1572 1573 static void 1574 carp_ifa_delroute(struct ifaddr *ifa) 1575 { 1576 1577 switch (ifa->ifa_addr->sa_family) { 1578 #ifdef INET 1579 case AF_INET: 1580 ifa_del_loopback_route(ifa, 1581 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1582 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1583 break; 1584 #endif 1585 #ifdef INET6 1586 case AF_INET6: 1587 ifa_del_loopback_route(ifa, 1588 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1589 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1590 break; 1591 #endif 1592 } 1593 } 1594 1595 int 1596 carp_master(struct ifaddr *ifa) 1597 { 1598 struct carp_softc *sc = ifa->ifa_carp; 1599 1600 return (sc->sc_state == MASTER); 1601 } 1602 1603 #ifdef INET 1604 /* 1605 * Broadcast a gratuitous ARP request containing 1606 * the virtual router MAC address for each IP address 1607 * associated with the virtual router. 1608 */ 1609 static void 1610 carp_send_arp(struct carp_softc *sc) 1611 { 1612 struct ifaddr *ifa; 1613 struct in_addr addr; 1614 1615 NET_EPOCH_ASSERT(); 1616 1617 CARP_FOREACH_IFA(sc, ifa) { 1618 if (ifa->ifa_addr->sa_family != AF_INET) 1619 continue; 1620 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1621 arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); 1622 } 1623 } 1624 1625 int 1626 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1627 { 1628 struct carp_softc *sc = ifa->ifa_carp; 1629 1630 if (sc->sc_state == MASTER) { 1631 *enaddr = sc->sc_addr; 1632 return (1); 1633 } 1634 1635 return (0); 1636 } 1637 #endif 1638 1639 #ifdef INET6 1640 static void 1641 carp_send_na(struct carp_softc *sc) 1642 { 1643 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1644 struct ifaddr *ifa; 1645 struct in6_addr *in6; 1646 1647 CARP_FOREACH_IFA(sc, ifa) { 1648 if (ifa->ifa_addr->sa_family != AF_INET6) 1649 continue; 1650 1651 in6 = IFA_IN6(ifa); 1652 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1653 ND_NA_FLAG_OVERRIDE, 1, NULL); 1654 DELAY(1000); /* XXX */ 1655 } 1656 } 1657 1658 /* 1659 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1660 * matches and is not a carp address. Returns NULL otherwise. 1661 */ 1662 struct ifaddr * 1663 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1664 { 1665 struct ifaddr *ifa; 1666 1667 NET_EPOCH_ASSERT(); 1668 1669 ifa = NULL; 1670 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1671 if (ifa->ifa_addr->sa_family != AF_INET6) 1672 continue; 1673 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1674 continue; 1675 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1676 ifa = NULL; 1677 else 1678 ifa_ref(ifa); 1679 break; 1680 } 1681 1682 return (ifa); 1683 } 1684 1685 char * 1686 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1687 { 1688 struct ifaddr *ifa; 1689 char *mac = NULL; 1690 1691 NET_EPOCH_ASSERT(); 1692 1693 IFNET_FOREACH_IFA(ifp, ifa) 1694 if (ifa->ifa_addr->sa_family == AF_INET6 && 1695 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1696 struct carp_softc *sc = ifa->ifa_carp; 1697 struct m_tag *mtag; 1698 1699 mtag = m_tag_get(PACKET_TAG_CARP, 1700 sizeof(sc->sc_vhid) + sizeof(sc->sc_addr), 1701 M_NOWAIT); 1702 if (mtag == NULL) { 1703 CARPSTATS_INC(carps_onomem); 1704 break; 1705 } 1706 /* carp_output expects sc_vhid first. */ 1707 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1708 /* 1709 * Save sc_addr into mtag data after sc_vhid to avoid 1710 * possible access to destroyed softc. 1711 */ 1712 mac = (char *)(mtag + 1) + sizeof(sc->sc_vhid); 1713 bcopy(sc->sc_addr, mac, sizeof(sc->sc_addr)); 1714 1715 m_tag_prepend(m, mtag); 1716 break; 1717 } 1718 1719 return (mac); 1720 } 1721 #endif /* INET6 */ 1722 1723 int 1724 carp_forus(struct ifnet *ifp, u_char *dhost) 1725 { 1726 struct carp_softc *sc; 1727 uint8_t *ena = dhost; 1728 1729 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1730 return (0); 1731 1732 CIF_LOCK(ifp->if_carp); 1733 IFNET_FOREACH_CARP(ifp, sc) { 1734 /* 1735 * CARP_LOCK() is not here, since would protect nothing, but 1736 * cause deadlock with if_bridge, calling this under its lock. 1737 */ 1738 if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, 1739 ETHER_ADDR_LEN)) { 1740 CIF_UNLOCK(ifp->if_carp); 1741 return (1); 1742 } 1743 } 1744 CIF_UNLOCK(ifp->if_carp); 1745 1746 return (0); 1747 } 1748 1749 /* Master down timeout event, executed in callout context. */ 1750 static void 1751 carp_master_down(void *v) 1752 { 1753 struct carp_softc *sc = v; 1754 struct epoch_tracker et; 1755 1756 NET_EPOCH_ENTER(et); 1757 CARP_LOCK_ASSERT(sc); 1758 1759 CURVNET_SET(sc->sc_carpdev->if_vnet); 1760 if (sc->sc_state == BACKUP) { 1761 carp_master_down_locked(sc, "master timed out"); 1762 } 1763 CURVNET_RESTORE(); 1764 1765 CARP_UNLOCK(sc); 1766 NET_EPOCH_EXIT(et); 1767 } 1768 1769 static void 1770 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1771 { 1772 1773 NET_EPOCH_ASSERT(); 1774 CARP_LOCK_ASSERT(sc); 1775 1776 switch (sc->sc_state) { 1777 case BACKUP: 1778 carp_set_state(sc, MASTER, reason); 1779 send_ad_locked(sc); 1780 #ifdef INET 1781 carp_send_arp(sc); 1782 #endif 1783 #ifdef INET6 1784 carp_send_na(sc); 1785 #endif 1786 carp_setrun(sc, 0); 1787 carp_addroute(sc); 1788 break; 1789 case INIT: 1790 case MASTER: 1791 #ifdef INVARIANTS 1792 panic("carp: VHID %u@%s: master_down event in %s state\n", 1793 sc->sc_vhid, 1794 if_name(sc->sc_carpdev), 1795 sc->sc_state ? "MASTER" : "INIT"); 1796 #endif 1797 break; 1798 } 1799 } 1800 1801 /* 1802 * When in backup state, af indicates whether to reset the master down timer 1803 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1804 */ 1805 static void 1806 carp_setrun(struct carp_softc *sc, sa_family_t af) 1807 { 1808 struct timeval tv; 1809 int timeout; 1810 1811 CARP_LOCK_ASSERT(sc); 1812 1813 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1814 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1815 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1816 !V_carp_allow) 1817 return; 1818 1819 switch (sc->sc_state) { 1820 case INIT: 1821 carp_set_state(sc, BACKUP, "initialization complete"); 1822 carp_setrun(sc, 0); 1823 break; 1824 case BACKUP: 1825 callout_stop(&sc->sc_ad_tmo); 1826 1827 switch (sc->sc_version) { 1828 case CARP_VERSION_CARP: 1829 tv.tv_sec = 3 * sc->sc_advbase; 1830 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1831 timeout = tvtohz(&tv); 1832 break; 1833 case CARP_VERSION_VRRPv3: 1834 /* skew time */ 1835 timeout = (256 - sc->sc_vrrp_prio) * 1836 sc->sc_vrrp_master_inter / 256; 1837 timeout += (3 * sc->sc_vrrp_master_inter); 1838 timeout *= hz; 1839 timeout /= 100; /* master interval is in centiseconds */ 1840 break; 1841 } 1842 switch (af) { 1843 #ifdef INET 1844 case AF_INET: 1845 callout_reset(&sc->sc_md_tmo, timeout, 1846 carp_master_down, sc); 1847 break; 1848 #endif 1849 #ifdef INET6 1850 case AF_INET6: 1851 callout_reset(&sc->sc_md6_tmo, timeout, 1852 carp_master_down, sc); 1853 break; 1854 #endif 1855 default: 1856 #ifdef INET 1857 if (sc->sc_naddrs) 1858 callout_reset(&sc->sc_md_tmo, timeout, 1859 carp_master_down, sc); 1860 #endif 1861 #ifdef INET6 1862 if (sc->sc_naddrs6) 1863 callout_reset(&sc->sc_md6_tmo, timeout, 1864 carp_master_down, sc); 1865 #endif 1866 break; 1867 } 1868 break; 1869 case MASTER: 1870 switch (sc->sc_version) { 1871 case CARP_VERSION_CARP: 1872 tv.tv_sec = sc->sc_advbase; 1873 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1874 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1875 carp_callout, sc); 1876 break; 1877 case CARP_VERSION_VRRPv3: 1878 callout_reset(&sc->sc_ad_tmo, 1879 sc->sc_vrrp_adv_inter * hz / 100, 1880 carp_callout, sc); 1881 break; 1882 } 1883 break; 1884 } 1885 } 1886 1887 /* 1888 * Setup multicast structures. 1889 */ 1890 static int 1891 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1892 { 1893 struct ifnet *ifp = cif->cif_ifp; 1894 int error = 0; 1895 1896 switch (sa) { 1897 #ifdef INET 1898 case AF_INET: 1899 { 1900 struct ip_moptions *imo = &cif->cif_imo; 1901 struct in_mfilter *imf; 1902 struct in_addr addr; 1903 1904 if (ip_mfilter_first(&imo->imo_head) != NULL) 1905 return (0); 1906 1907 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1908 ip_mfilter_init(&imo->imo_head); 1909 imo->imo_multicast_vif = -1; 1910 1911 addr.s_addr = htonl(INADDR_CARP_GROUP); 1912 if ((error = in_joingroup(ifp, &addr, NULL, 1913 &imf->imf_inm)) != 0) { 1914 ip_mfilter_free(imf); 1915 break; 1916 } 1917 1918 ip_mfilter_insert(&imo->imo_head, imf); 1919 imo->imo_multicast_ifp = ifp; 1920 imo->imo_multicast_ttl = CARP_DFLTTL; 1921 imo->imo_multicast_loop = 0; 1922 break; 1923 } 1924 #endif 1925 #ifdef INET6 1926 case AF_INET6: 1927 { 1928 struct ip6_moptions *im6o = &cif->cif_im6o; 1929 struct in6_mfilter *im6f[2]; 1930 struct in6_addr in6; 1931 1932 if (ip6_mfilter_first(&im6o->im6o_head)) 1933 return (0); 1934 1935 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1936 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1937 1938 ip6_mfilter_init(&im6o->im6o_head); 1939 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1940 im6o->im6o_multicast_ifp = ifp; 1941 1942 /* Join IPv6 CARP multicast group. */ 1943 bzero(&in6, sizeof(in6)); 1944 in6.s6_addr16[0] = htons(0xff02); 1945 in6.s6_addr8[15] = 0x12; 1946 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1947 ip6_mfilter_free(im6f[0]); 1948 ip6_mfilter_free(im6f[1]); 1949 break; 1950 } 1951 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1952 ip6_mfilter_free(im6f[0]); 1953 ip6_mfilter_free(im6f[1]); 1954 break; 1955 } 1956 1957 /* Join solicited multicast address. */ 1958 bzero(&in6, sizeof(in6)); 1959 in6.s6_addr16[0] = htons(0xff02); 1960 in6.s6_addr32[1] = 0; 1961 in6.s6_addr32[2] = htonl(1); 1962 in6.s6_addr32[3] = 0; 1963 in6.s6_addr8[12] = 0xff; 1964 1965 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1966 ip6_mfilter_free(im6f[0]); 1967 ip6_mfilter_free(im6f[1]); 1968 break; 1969 } 1970 1971 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1972 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1973 ip6_mfilter_free(im6f[0]); 1974 ip6_mfilter_free(im6f[1]); 1975 break; 1976 } 1977 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1978 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1979 break; 1980 } 1981 #endif 1982 } 1983 1984 return (error); 1985 } 1986 1987 /* 1988 * Free multicast structures. 1989 */ 1990 static void 1991 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1992 { 1993 #ifdef INET 1994 struct ip_moptions *imo = &cif->cif_imo; 1995 struct in_mfilter *imf; 1996 #endif 1997 #ifdef INET6 1998 struct ip6_moptions *im6o = &cif->cif_im6o; 1999 struct in6_mfilter *im6f; 2000 #endif 2001 sx_assert(&carp_sx, SA_XLOCKED); 2002 2003 switch (sa) { 2004 #ifdef INET 2005 case AF_INET: 2006 if (cif->cif_naddrs != 0) 2007 break; 2008 2009 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2010 ip_mfilter_remove(&imo->imo_head, imf); 2011 in_leavegroup(imf->imf_inm, NULL); 2012 ip_mfilter_free(imf); 2013 } 2014 break; 2015 #endif 2016 #ifdef INET6 2017 case AF_INET6: 2018 if (cif->cif_naddrs6 != 0) 2019 break; 2020 2021 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2022 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2023 in6_leavegroup(im6f->im6f_in6m, NULL); 2024 ip6_mfilter_free(im6f); 2025 } 2026 break; 2027 #endif 2028 } 2029 } 2030 2031 int 2032 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 2033 { 2034 struct m_tag *mtag; 2035 int vhid; 2036 2037 if (!sa) 2038 return (0); 2039 2040 switch (sa->sa_family) { 2041 #ifdef INET 2042 case AF_INET: 2043 break; 2044 #endif 2045 #ifdef INET6 2046 case AF_INET6: 2047 break; 2048 #endif 2049 default: 2050 return (0); 2051 } 2052 2053 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2054 if (mtag == NULL) 2055 return (0); 2056 2057 bcopy(mtag + 1, &vhid, sizeof(vhid)); 2058 2059 /* Set the source MAC address to the Virtual Router MAC Address. */ 2060 switch (ifp->if_type) { 2061 case IFT_ETHER: 2062 case IFT_BRIDGE: 2063 case IFT_L2VLAN: { 2064 struct ether_header *eh; 2065 2066 eh = mtod(m, struct ether_header *); 2067 eh->ether_shost[0] = 0; 2068 eh->ether_shost[1] = 0; 2069 eh->ether_shost[2] = 0x5e; 2070 eh->ether_shost[3] = 0; 2071 eh->ether_shost[4] = 1; 2072 eh->ether_shost[5] = vhid; 2073 } 2074 break; 2075 default: 2076 printf("%s: carp is not supported for the %d interface type\n", 2077 if_name(ifp), ifp->if_type); 2078 return (EOPNOTSUPP); 2079 } 2080 2081 return (0); 2082 } 2083 2084 static struct carp_softc* 2085 carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) 2086 { 2087 struct carp_softc *sc; 2088 struct carp_if *cif; 2089 2090 sx_assert(&carp_sx, SA_XLOCKED); 2091 2092 if ((cif = ifp->if_carp) == NULL) 2093 cif = carp_alloc_if(ifp); 2094 2095 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); 2096 *sc = (struct carp_softc ){ 2097 .sc_vhid = vhid, 2098 .sc_version = version, 2099 .sc_state = INIT, 2100 .sc_carpdev = ifp, 2101 .sc_ifasiz = sizeof(struct ifaddr *), 2102 .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, 2103 }; 2104 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 2105 2106 switch (version) { 2107 case CARP_VERSION_CARP: 2108 sc->sc_advbase = CARP_DFLTINTV; 2109 sc->sc_init_counter = true; 2110 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 2111 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 2112 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 2113 break; 2114 case CARP_VERSION_VRRPv3: 2115 sc->sc_vrrp_adv_inter = 100; 2116 sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; 2117 sc->sc_vrrp_prio = 100; 2118 break; 2119 } 2120 2121 CARP_LOCK_INIT(sc); 2122 #ifdef INET 2123 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2124 #endif 2125 #ifdef INET6 2126 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2127 #endif 2128 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2129 2130 CIF_LOCK(cif); 2131 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 2132 CIF_UNLOCK(cif); 2133 2134 mtx_lock(&carp_mtx); 2135 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 2136 mtx_unlock(&carp_mtx); 2137 2138 return (sc); 2139 } 2140 2141 static void 2142 carp_grow_ifas(struct carp_softc *sc) 2143 { 2144 struct ifaddr **new; 2145 2146 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 2147 CARP_LOCK(sc); 2148 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 2149 free(sc->sc_ifas, M_CARP); 2150 sc->sc_ifas = new; 2151 sc->sc_ifasiz *= 2; 2152 CARP_UNLOCK(sc); 2153 } 2154 2155 static void 2156 carp_destroy(struct carp_softc *sc) 2157 { 2158 struct ifnet *ifp = sc->sc_carpdev; 2159 struct carp_if *cif = ifp->if_carp; 2160 2161 sx_assert(&carp_sx, SA_XLOCKED); 2162 2163 if (sc->sc_suppress) 2164 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 2165 CARP_UNLOCK(sc); 2166 2167 CIF_LOCK(cif); 2168 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 2169 CIF_UNLOCK(cif); 2170 2171 mtx_lock(&carp_mtx); 2172 LIST_REMOVE(sc, sc_next); 2173 mtx_unlock(&carp_mtx); 2174 2175 callout_drain(&sc->sc_ad_tmo); 2176 #ifdef INET 2177 callout_drain(&sc->sc_md_tmo); 2178 #endif 2179 #ifdef INET6 2180 callout_drain(&sc->sc_md6_tmo); 2181 #endif 2182 CARP_LOCK_DESTROY(sc); 2183 2184 free(sc->sc_ifas, M_CARP); 2185 free(sc, M_CARP); 2186 } 2187 2188 static struct carp_if* 2189 carp_alloc_if(struct ifnet *ifp) 2190 { 2191 struct carp_if *cif; 2192 int error; 2193 2194 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 2195 2196 if ((error = ifpromisc(ifp, 1)) != 0) 2197 printf("%s: ifpromisc(%s) failed: %d\n", 2198 __func__, if_name(ifp), error); 2199 else 2200 cif->cif_flags |= CIF_PROMISC; 2201 2202 CIF_LOCK_INIT(cif); 2203 cif->cif_ifp = ifp; 2204 TAILQ_INIT(&cif->cif_vrs); 2205 2206 IF_ADDR_WLOCK(ifp); 2207 ifp->if_carp = cif; 2208 if_ref(ifp); 2209 IF_ADDR_WUNLOCK(ifp); 2210 2211 return (cif); 2212 } 2213 2214 static void 2215 carp_free_if(struct carp_if *cif) 2216 { 2217 struct ifnet *ifp = cif->cif_ifp; 2218 2219 CIF_LOCK_ASSERT(cif); 2220 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 2221 __func__)); 2222 2223 IF_ADDR_WLOCK(ifp); 2224 ifp->if_carp = NULL; 2225 IF_ADDR_WUNLOCK(ifp); 2226 2227 CIF_LOCK_DESTROY(cif); 2228 2229 if (cif->cif_flags & CIF_PROMISC) 2230 ifpromisc(ifp, 0); 2231 if_rele(ifp); 2232 2233 free(cif, M_CARP); 2234 } 2235 2236 static bool 2237 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 2238 { 2239 struct carpreq *carpr = arg; 2240 2241 CARP_LOCK(sc); 2242 carpr->carpr_state = sc->sc_state; 2243 carpr->carpr_vhid = sc->sc_vhid; 2244 switch (sc->sc_version) { 2245 case CARP_VERSION_CARP: 2246 carpr->carpr_advbase = sc->sc_advbase; 2247 carpr->carpr_advskew = sc->sc_advskew; 2248 if (priv) 2249 bcopy(sc->sc_key, carpr->carpr_key, 2250 sizeof(carpr->carpr_key)); 2251 else 2252 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 2253 break; 2254 case CARP_VERSION_VRRPv3: 2255 break; 2256 } 2257 CARP_UNLOCK(sc); 2258 2259 return (true); 2260 } 2261 2262 static int 2263 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 2264 { 2265 struct epoch_tracker et; 2266 struct carp_softc *sc = NULL; 2267 int error = 0; 2268 2269 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) 2270 return (EINVAL); 2271 2272 switch (carpr->carpr_version) { 2273 case CARP_VERSION_CARP: 2274 if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || 2275 carpr->carpr_advbase < CARP_DFLTINTV)) 2276 return (EINVAL); 2277 if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) 2278 return (EINVAL); 2279 break; 2280 case CARP_VERSION_VRRPv3: 2281 /* XXXGL: shouldn't we check anything? */ 2282 break; 2283 default: 2284 return (EINVAL); 2285 } 2286 2287 if (ifp->if_carp) { 2288 IFNET_FOREACH_CARP(ifp, sc) 2289 if (sc->sc_vhid == carpr->carpr_vhid) 2290 break; 2291 } 2292 2293 if (sc == NULL) 2294 sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); 2295 else if (sc->sc_version != carpr->carpr_version) 2296 return (EINVAL); 2297 2298 CARP_LOCK(sc); 2299 switch (sc->sc_version) { 2300 case CARP_VERSION_CARP: 2301 if (carpr->carpr_advbase != 0) 2302 sc->sc_advbase = carpr->carpr_advbase; 2303 sc->sc_advskew = carpr->carpr_advskew; 2304 if (carpr->carpr_addr.s_addr != INADDR_ANY) 2305 sc->sc_carpaddr = carpr->carpr_addr; 2306 if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 2307 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 2308 sizeof(sc->sc_carpaddr6)); 2309 } 2310 if (carpr->carpr_key[0] != '\0') { 2311 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2312 carp_hmac_prepare(sc); 2313 } 2314 break; 2315 case CARP_VERSION_VRRPv3: 2316 if (carpr->carpr_vrrp_priority != 0) 2317 sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; 2318 if (carpr->carpr_vrrp_adv_inter) 2319 sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; 2320 break; 2321 } 2322 2323 if (sc->sc_state != INIT && 2324 carpr->carpr_state != sc->sc_state) { 2325 switch (carpr->carpr_state) { 2326 case BACKUP: 2327 callout_stop(&sc->sc_ad_tmo); 2328 carp_set_state(sc, BACKUP, 2329 "user requested via ifconfig"); 2330 carp_setrun(sc, 0); 2331 carp_delroute(sc); 2332 break; 2333 case MASTER: 2334 NET_EPOCH_ENTER(et); 2335 carp_master_down_locked(sc, 2336 "user requested via ifconfig"); 2337 NET_EPOCH_EXIT(et); 2338 break; 2339 default: 2340 break; 2341 } 2342 } 2343 CARP_UNLOCK(sc); 2344 2345 return (error); 2346 } 2347 2348 static int 2349 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 2350 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 2351 { 2352 int priveleged; 2353 struct carp_softc *sc; 2354 2355 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 2356 return (EINVAL); 2357 if (carpr->carpr_count < 1) 2358 return (EMSGSIZE); 2359 if (ifp->if_carp == NULL) 2360 return (ENOENT); 2361 2362 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 2363 if (carpr->carpr_vhid != 0) { 2364 IFNET_FOREACH_CARP(ifp, sc) 2365 if (sc->sc_vhid == carpr->carpr_vhid) 2366 break; 2367 if (sc == NULL) 2368 return (ENOENT); 2369 2370 if (! outfn(arg, sc, priveleged)) 2371 return (ENOMEM); 2372 carpr->carpr_count = 1; 2373 } else { 2374 int count; 2375 2376 count = 0; 2377 IFNET_FOREACH_CARP(ifp, sc) 2378 count++; 2379 2380 if (count > carpr->carpr_count) 2381 return (EMSGSIZE); 2382 2383 IFNET_FOREACH_CARP(ifp, sc) { 2384 if (! outfn(arg, sc, priveleged)) 2385 return (ENOMEM); 2386 carpr->carpr_count = count; 2387 } 2388 } 2389 2390 return (0); 2391 } 2392 2393 int 2394 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 2395 { 2396 struct carpreq carpr; 2397 struct carpkreq carprk = { 2398 .carpr_version = CARP_VERSION_CARP, 2399 }; 2400 struct ifnet *ifp; 2401 int error = 0; 2402 2403 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 2404 return (error); 2405 2406 ifp = ifunit_ref(ifr->ifr_name); 2407 if ((error = carp_is_supported_if(ifp)) != 0) 2408 goto out; 2409 2410 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2411 error = EADDRNOTAVAIL; 2412 goto out; 2413 } 2414 2415 sx_xlock(&carp_sx); 2416 switch (cmd) { 2417 case SIOCSVH: 2418 if ((error = priv_check(td, PRIV_NETINET_CARP))) 2419 break; 2420 2421 memcpy(&carprk, &carpr, sizeof(carpr)); 2422 error = carp_ioctl_set(ifp, &carprk); 2423 break; 2424 2425 case SIOCGVH: 2426 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 2427 carp_carprcp, &carpr); 2428 if (error == 0) { 2429 error = copyout(&carpr, 2430 (char *)ifr_data_get_ptr(ifr), 2431 carpr.carpr_count * sizeof(carpr)); 2432 } 2433 break; 2434 default: 2435 error = EINVAL; 2436 } 2437 sx_xunlock(&carp_sx); 2438 2439 out: 2440 if (ifp != NULL) 2441 if_rele(ifp); 2442 2443 return (error); 2444 } 2445 2446 static int 2447 carp_get_vhid(struct ifaddr *ifa) 2448 { 2449 2450 if (ifa == NULL || ifa->ifa_carp == NULL) 2451 return (0); 2452 2453 return (ifa->ifa_carp->sc_vhid); 2454 } 2455 2456 int 2457 carp_attach(struct ifaddr *ifa, int vhid) 2458 { 2459 struct ifnet *ifp = ifa->ifa_ifp; 2460 struct carp_if *cif = ifp->if_carp; 2461 struct carp_softc *sc; 2462 int index, error; 2463 2464 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 2465 2466 switch (ifa->ifa_addr->sa_family) { 2467 #ifdef INET 2468 case AF_INET: 2469 #endif 2470 #ifdef INET6 2471 case AF_INET6: 2472 #endif 2473 break; 2474 default: 2475 return (EPROTOTYPE); 2476 } 2477 2478 sx_xlock(&carp_sx); 2479 if (ifp->if_carp == NULL) { 2480 sx_xunlock(&carp_sx); 2481 return (ENOPROTOOPT); 2482 } 2483 2484 IFNET_FOREACH_CARP(ifp, sc) 2485 if (sc->sc_vhid == vhid) 2486 break; 2487 if (sc == NULL) { 2488 sx_xunlock(&carp_sx); 2489 return (ENOENT); 2490 } 2491 2492 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2493 if (error) { 2494 CIF_FREE(cif); 2495 sx_xunlock(&carp_sx); 2496 return (error); 2497 } 2498 2499 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2500 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2501 carp_grow_ifas(sc); 2502 2503 switch (ifa->ifa_addr->sa_family) { 2504 #ifdef INET 2505 case AF_INET: 2506 cif->cif_naddrs++; 2507 sc->sc_naddrs++; 2508 break; 2509 #endif 2510 #ifdef INET6 2511 case AF_INET6: 2512 cif->cif_naddrs6++; 2513 sc->sc_naddrs6++; 2514 break; 2515 #endif 2516 } 2517 2518 ifa_ref(ifa); 2519 2520 CARP_LOCK(sc); 2521 sc->sc_ifas[index - 1] = ifa; 2522 ifa->ifa_carp = sc; 2523 if (sc->sc_version == CARP_VERSION_CARP) 2524 carp_hmac_prepare(sc); 2525 carp_sc_state(sc); 2526 CARP_UNLOCK(sc); 2527 2528 sx_xunlock(&carp_sx); 2529 2530 return (0); 2531 } 2532 2533 void 2534 carp_detach(struct ifaddr *ifa, bool keep_cif) 2535 { 2536 struct ifnet *ifp = ifa->ifa_ifp; 2537 struct carp_if *cif = ifp->if_carp; 2538 struct carp_softc *sc = ifa->ifa_carp; 2539 int i, index; 2540 2541 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2542 2543 sx_xlock(&carp_sx); 2544 2545 CARP_LOCK(sc); 2546 /* Shift array. */ 2547 index = sc->sc_naddrs + sc->sc_naddrs6; 2548 for (i = 0; i < index; i++) 2549 if (sc->sc_ifas[i] == ifa) 2550 break; 2551 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2552 for (; i < index - 1; i++) 2553 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2554 sc->sc_ifas[index - 1] = NULL; 2555 2556 switch (ifa->ifa_addr->sa_family) { 2557 #ifdef INET 2558 case AF_INET: 2559 cif->cif_naddrs--; 2560 sc->sc_naddrs--; 2561 break; 2562 #endif 2563 #ifdef INET6 2564 case AF_INET6: 2565 cif->cif_naddrs6--; 2566 sc->sc_naddrs6--; 2567 break; 2568 #endif 2569 } 2570 2571 carp_ifa_delroute(ifa); 2572 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2573 2574 ifa->ifa_carp = NULL; 2575 ifa_free(ifa); 2576 2577 if (sc->sc_version == CARP_VERSION_CARP) 2578 carp_hmac_prepare(sc); 2579 carp_sc_state(sc); 2580 2581 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2582 carp_destroy(sc); 2583 else 2584 CARP_UNLOCK(sc); 2585 2586 if (!keep_cif) 2587 CIF_FREE(cif); 2588 2589 sx_xunlock(&carp_sx); 2590 } 2591 2592 static void 2593 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2594 { 2595 2596 CARP_LOCK_ASSERT(sc); 2597 2598 if (sc->sc_state != state) { 2599 const char *carp_states[] = { CARP_STATES }; 2600 char subsys[IFNAMSIZ+5]; 2601 2602 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2603 if_name(sc->sc_carpdev)); 2604 2605 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2606 carp_states[sc->sc_state], carp_states[state], reason); 2607 2608 sc->sc_state = state; 2609 2610 devctl_notify("CARP", subsys, carp_states[state], NULL); 2611 } 2612 } 2613 2614 static void 2615 carp_linkstate(struct ifnet *ifp) 2616 { 2617 struct carp_softc *sc; 2618 2619 CIF_LOCK(ifp->if_carp); 2620 IFNET_FOREACH_CARP(ifp, sc) { 2621 CARP_LOCK(sc); 2622 carp_sc_state(sc); 2623 CARP_UNLOCK(sc); 2624 } 2625 CIF_UNLOCK(ifp->if_carp); 2626 } 2627 2628 static void 2629 carp_sc_state(struct carp_softc *sc) 2630 { 2631 2632 CARP_LOCK_ASSERT(sc); 2633 2634 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2635 !(sc->sc_carpdev->if_flags & IFF_UP) || 2636 !V_carp_allow) { 2637 callout_stop(&sc->sc_ad_tmo); 2638 #ifdef INET 2639 callout_stop(&sc->sc_md_tmo); 2640 #endif 2641 #ifdef INET6 2642 callout_stop(&sc->sc_md6_tmo); 2643 #endif 2644 carp_set_state(sc, INIT, "hardware interface down"); 2645 carp_setrun(sc, 0); 2646 carp_delroute(sc); 2647 if (!sc->sc_suppress) 2648 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2649 sc->sc_suppress = 1; 2650 } else { 2651 carp_set_state(sc, INIT, "hardware interface up"); 2652 carp_setrun(sc, 0); 2653 if (sc->sc_suppress) 2654 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2655 sc->sc_suppress = 0; 2656 } 2657 } 2658 2659 static void 2660 carp_demote_adj(int adj, char *reason) 2661 { 2662 atomic_add_int(&V_carp_demotion, adj); 2663 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2664 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2665 } 2666 2667 static int 2668 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2669 { 2670 int new, error; 2671 struct carp_softc *sc; 2672 2673 new = V_carp_allow; 2674 error = sysctl_handle_int(oidp, &new, 0, req); 2675 if (error || !req->newptr) 2676 return (error); 2677 2678 if (V_carp_allow != new) { 2679 V_carp_allow = new; 2680 2681 mtx_lock(&carp_mtx); 2682 LIST_FOREACH(sc, &carp_list, sc_next) { 2683 CARP_LOCK(sc); 2684 if (curvnet == sc->sc_carpdev->if_vnet) 2685 carp_sc_state(sc); 2686 CARP_UNLOCK(sc); 2687 } 2688 mtx_unlock(&carp_mtx); 2689 } 2690 2691 return (0); 2692 } 2693 2694 static int 2695 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2696 { 2697 int new, error; 2698 2699 new = V_carp_dscp; 2700 error = sysctl_handle_int(oidp, &new, 0, req); 2701 if (error || !req->newptr) 2702 return (error); 2703 2704 if (new < 0 || new > 63) 2705 return (EINVAL); 2706 2707 V_carp_dscp = new; 2708 2709 return (0); 2710 } 2711 2712 static int 2713 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2714 { 2715 int new, error; 2716 2717 new = V_carp_demotion; 2718 error = sysctl_handle_int(oidp, &new, 0, req); 2719 if (error || !req->newptr) 2720 return (error); 2721 2722 carp_demote_adj(new, "sysctl"); 2723 2724 return (0); 2725 } 2726 2727 static int 2728 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2729 { 2730 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2731 return (EINVAL); 2732 2733 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2734 return (0); 2735 } 2736 2737 struct carp_nl_send_args { 2738 struct nlmsghdr *hdr; 2739 struct nl_pstate *npt; 2740 }; 2741 2742 static bool 2743 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2744 { 2745 struct carp_nl_send_args *nlsa = arg; 2746 struct nlmsghdr *hdr = nlsa->hdr; 2747 struct nl_pstate *npt = nlsa->npt; 2748 struct nl_writer *nw = npt->nw; 2749 struct genlmsghdr *ghdr_new; 2750 2751 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2752 nlmsg_abort(nw); 2753 return (false); 2754 } 2755 2756 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2757 if (ghdr_new == NULL) { 2758 nlmsg_abort(nw); 2759 return (false); 2760 } 2761 2762 ghdr_new->cmd = CARP_NL_CMD_GET; 2763 ghdr_new->version = 0; 2764 ghdr_new->reserved = 0; 2765 2766 CARP_LOCK(sc); 2767 2768 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2769 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2770 nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); 2771 switch (sc->sc_version) { 2772 case CARP_VERSION_CARP: 2773 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2774 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2775 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2776 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2777 if (priv) 2778 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), 2779 sc->sc_key); 2780 break; 2781 case CARP_VERSION_VRRPv3: 2782 nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); 2783 nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, 2784 sc->sc_vrrp_adv_inter); 2785 break; 2786 } 2787 2788 CARP_UNLOCK(sc); 2789 2790 if (! nlmsg_end(nw)) { 2791 nlmsg_abort(nw); 2792 return (false); 2793 } 2794 2795 return (true); 2796 } 2797 2798 struct nl_carp_parsed { 2799 unsigned int ifindex; 2800 char *ifname; 2801 uint32_t state; 2802 uint32_t vhid; 2803 int32_t advbase; 2804 int32_t advskew; 2805 char key[CARP_KEY_LEN]; 2806 struct in_addr addr; 2807 struct in6_addr addr6; 2808 carp_version_t version; 2809 uint8_t vrrp_prio; 2810 uint16_t vrrp_adv_inter; 2811 }; 2812 2813 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2814 static const struct nlattr_parser nla_p_set[] = { 2815 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2816 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2817 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2818 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2819 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2820 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2821 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2822 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2823 { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, 2824 { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, 2825 { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, 2826 { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, 2827 }; 2828 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_empty, nla_p_set); 2829 #undef _OUT 2830 2831 2832 static int 2833 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2834 { 2835 struct nl_carp_parsed attrs = { }; 2836 struct carp_nl_send_args args; 2837 struct carpreq carpr = { }; 2838 struct epoch_tracker et; 2839 if_t ifp = NULL; 2840 int error; 2841 2842 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2843 if (error != 0) 2844 return (error); 2845 2846 NET_EPOCH_ENTER(et); 2847 if (attrs.ifname != NULL) 2848 ifp = ifunit_ref(attrs.ifname); 2849 else if (attrs.ifindex != 0) 2850 ifp = ifnet_byindex_ref(attrs.ifindex); 2851 NET_EPOCH_EXIT(et); 2852 2853 if ((error = carp_is_supported_if(ifp)) != 0) 2854 goto out; 2855 2856 hdr->nlmsg_flags |= NLM_F_MULTI; 2857 args.hdr = hdr; 2858 args.npt = npt; 2859 2860 carpr.carpr_vhid = attrs.vhid; 2861 carpr.carpr_count = CARP_MAXVHID; 2862 2863 sx_xlock(&carp_sx); 2864 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2865 carp_nl_send, &args); 2866 sx_xunlock(&carp_sx); 2867 2868 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2869 error = ENOMEM; 2870 2871 out: 2872 if (ifp != NULL) 2873 if_rele(ifp); 2874 2875 return (error); 2876 } 2877 2878 static int 2879 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2880 { 2881 struct nl_carp_parsed attrs = { }; 2882 struct carpkreq carpr; 2883 struct epoch_tracker et; 2884 if_t ifp = NULL; 2885 int error; 2886 2887 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2888 if (error != 0) 2889 return (error); 2890 2891 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2892 return (EINVAL); 2893 if (attrs.state > CARP_MAXSTATE) 2894 return (EINVAL); 2895 if (attrs.version == 0) /* compat with pre-VRRPv3 */ 2896 attrs.version = CARP_VERSION_CARP; 2897 switch (attrs.version) { 2898 case CARP_VERSION_CARP: 2899 if (attrs.advbase < 0 || attrs.advskew < 0) 2900 return (EINVAL); 2901 if (attrs.advbase > 255) 2902 return (EINVAL); 2903 if (attrs.advskew >= 255) 2904 return (EINVAL); 2905 break; 2906 case CARP_VERSION_VRRPv3: 2907 if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) 2908 return (EINVAL); 2909 break; 2910 default: 2911 return (EINVAL); 2912 } 2913 2914 NET_EPOCH_ENTER(et); 2915 if (attrs.ifname != NULL) 2916 ifp = ifunit_ref(attrs.ifname); 2917 else if (attrs.ifindex != 0) 2918 ifp = ifnet_byindex_ref(attrs.ifindex); 2919 NET_EPOCH_EXIT(et); 2920 2921 if ((error = carp_is_supported_if(ifp)) != 0) 2922 goto out; 2923 2924 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2925 error = EADDRNOTAVAIL; 2926 goto out; 2927 } 2928 2929 carpr.carpr_count = 1; 2930 carpr.carpr_vhid = attrs.vhid; 2931 carpr.carpr_state = attrs.state; 2932 carpr.carpr_version = attrs.version; 2933 switch (attrs.version) { 2934 case CARP_VERSION_CARP: 2935 carpr.carpr_advbase = attrs.advbase; 2936 carpr.carpr_advskew = attrs.advskew; 2937 carpr.carpr_addr = attrs.addr; 2938 carpr.carpr_addr6 = attrs.addr6; 2939 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2940 break; 2941 case CARP_VERSION_VRRPv3: 2942 carpr.carpr_vrrp_priority = attrs.vrrp_prio; 2943 carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; 2944 break; 2945 } 2946 2947 sx_xlock(&carp_sx); 2948 error = carp_ioctl_set(ifp, &carpr); 2949 sx_xunlock(&carp_sx); 2950 2951 out: 2952 if (ifp != NULL) 2953 if_rele(ifp); 2954 2955 return (error); 2956 } 2957 2958 static const struct nlhdr_parser *all_parsers[] = { 2959 &carp_parser 2960 }; 2961 2962 static const struct genl_cmd carp_cmds[] = { 2963 { 2964 .cmd_num = CARP_NL_CMD_GET, 2965 .cmd_name = "SIOCGVH", 2966 .cmd_cb = carp_nl_get, 2967 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2968 GENL_CMD_CAP_HASPOL, 2969 }, 2970 { 2971 .cmd_num = CARP_NL_CMD_SET, 2972 .cmd_name = "SIOCSVH", 2973 .cmd_cb = carp_nl_set, 2974 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2975 .cmd_priv = PRIV_NETINET_CARP, 2976 }, 2977 }; 2978 2979 static uint16_t carp_family_id; 2980 static void 2981 carp_nl_register(void) 2982 { 2983 bool ret __diagused; 2984 2985 NL_VERIFY_PARSERS(all_parsers); 2986 carp_family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 2987 CARP_NL_CMD_MAX); 2988 MPASS(carp_family_id != 0); 2989 2990 ret = genl_register_cmds(carp_family_id, carp_cmds, nitems(carp_cmds)); 2991 MPASS(ret); 2992 } 2993 2994 static void 2995 carp_nl_unregister(void) 2996 { 2997 genl_unregister_family(carp_family_id); 2998 } 2999 3000 static void 3001 carp_mod_cleanup(void) 3002 { 3003 3004 carp_nl_unregister(); 3005 3006 #ifdef INET 3007 (void)ipproto_unregister(IPPROTO_CARP); 3008 carp_iamatch_p = NULL; 3009 #endif 3010 #ifdef INET6 3011 (void)ip6proto_unregister(IPPROTO_CARP); 3012 carp_iamatch6_p = NULL; 3013 carp_macmatch6_p = NULL; 3014 #endif 3015 carp_ioctl_p = NULL; 3016 carp_attach_p = NULL; 3017 carp_detach_p = NULL; 3018 carp_get_vhid_p = NULL; 3019 carp_linkstate_p = NULL; 3020 carp_forus_p = NULL; 3021 carp_output_p = NULL; 3022 carp_demote_adj_p = NULL; 3023 carp_master_p = NULL; 3024 mtx_unlock(&carp_mtx); 3025 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 3026 mtx_destroy(&carp_mtx); 3027 sx_destroy(&carp_sx); 3028 } 3029 3030 static void 3031 ipcarp_sysinit(void) 3032 { 3033 3034 /* Load allow as tunable so to postpone carp start after module load */ 3035 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 3036 } 3037 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 3038 3039 static int 3040 carp_mod_load(void) 3041 { 3042 int err; 3043 3044 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 3045 sx_init(&carp_sx, "carp_sx"); 3046 carp_get_vhid_p = carp_get_vhid; 3047 carp_forus_p = carp_forus; 3048 carp_output_p = carp_output; 3049 carp_linkstate_p = carp_linkstate; 3050 carp_ioctl_p = carp_ioctl; 3051 carp_attach_p = carp_attach; 3052 carp_detach_p = carp_detach; 3053 carp_demote_adj_p = carp_demote_adj; 3054 carp_master_p = carp_master; 3055 #ifdef INET6 3056 carp_iamatch6_p = carp_iamatch6; 3057 carp_macmatch6_p = carp_macmatch6; 3058 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 3059 if (err) { 3060 printf("carp: error %d registering with INET6\n", err); 3061 carp_mod_cleanup(); 3062 return (err); 3063 } 3064 #endif 3065 #ifdef INET 3066 carp_iamatch_p = carp_iamatch; 3067 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 3068 if (err) { 3069 printf("carp: error %d registering with INET\n", err); 3070 carp_mod_cleanup(); 3071 return (err); 3072 } 3073 #endif 3074 3075 carp_nl_register(); 3076 3077 return (0); 3078 } 3079 3080 static int 3081 carp_modevent(module_t mod, int type, void *data) 3082 { 3083 switch (type) { 3084 case MOD_LOAD: 3085 return carp_mod_load(); 3086 /* NOTREACHED */ 3087 case MOD_UNLOAD: 3088 mtx_lock(&carp_mtx); 3089 if (LIST_EMPTY(&carp_list)) 3090 carp_mod_cleanup(); 3091 else { 3092 mtx_unlock(&carp_mtx); 3093 return (EBUSY); 3094 } 3095 break; 3096 3097 default: 3098 return (EINVAL); 3099 } 3100 3101 return (0); 3102 } 3103 3104 static moduledata_t carp_mod = { 3105 "carp", 3106 carp_modevent, 3107 0 3108 }; 3109 3110 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 3111