1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "opt_bpf.h" 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/devctl.h> 38 #include <sys/jail.h> 39 #include <sys/kassert.h> 40 #include <sys/kernel.h> 41 #include <sys/limits.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/module.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/socket.h> 48 #include <sys/sockio.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/taskqueue.h> 52 #include <sys/counter.h> 53 54 #include <net/ethernet.h> 55 #include <net/if.h> 56 #include <net/if_var.h> 57 #include <net/if_dl.h> 58 #include <net/if_llatbl.h> 59 #include <net/if_private.h> 60 #include <net/if_types.h> 61 #include <net/route.h> 62 #include <net/vnet.h> 63 64 #if defined(INET) || defined(INET6) 65 #include <netinet/in.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_carp.h> 68 #include <netinet/ip_carp_nl.h> 69 #include <netinet/ip.h> 70 #include <machine/in_cksum.h> 71 #endif 72 #ifdef INET 73 #include <netinet/ip_var.h> 74 #include <netinet/if_ether.h> 75 #endif 76 77 #ifdef INET6 78 #include <netinet/icmp6.h> 79 #include <netinet/ip6.h> 80 #include <netinet6/in6_var.h> 81 #include <netinet6/ip6_var.h> 82 #include <netinet6/scope6_var.h> 83 #include <netinet6/nd6.h> 84 #endif 85 86 #include <netlink/netlink.h> 87 #include <netlink/netlink_ctl.h> 88 #include <netlink/netlink_generic.h> 89 #include <netlink/netlink_message_parser.h> 90 91 #include <crypto/sha1.h> 92 93 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 94 95 struct carp_softc { 96 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 97 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 98 carp_version_t sc_version; /* carp or VRRPv3 */ 99 uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ 100 struct callout sc_ad_tmo; /* Advertising timeout. */ 101 #ifdef INET 102 struct callout sc_md_tmo; /* Master down timeout. */ 103 #endif 104 #ifdef INET6 105 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 106 #endif 107 struct mtx sc_mtx; 108 109 int sc_vhid; 110 union { 111 struct { /* sc_version == CARP_VERSION_CARP */ 112 int sc_advskew; 113 int sc_advbase; 114 struct in_addr sc_carpaddr; 115 struct in6_addr sc_carpaddr6; 116 uint64_t sc_counter; 117 bool sc_init_counter; 118 #define CARP_HMAC_PAD 64 119 unsigned char sc_key[CARP_KEY_LEN]; 120 unsigned char sc_pad[CARP_HMAC_PAD]; 121 SHA1_CTX sc_sha1; 122 }; 123 struct { /* sc_version == CARP_VERSION_VRRPv3 */ 124 uint8_t sc_vrrp_prio; 125 uint16_t sc_vrrp_adv_inter; 126 uint16_t sc_vrrp_master_inter; 127 }; 128 }; 129 int sc_naddrs; 130 int sc_naddrs6; 131 int sc_ifasiz; 132 enum { INIT = 0, BACKUP, MASTER } sc_state; 133 int sc_suppress; 134 int sc_sendad_errors; 135 #define CARP_SENDAD_MAX_ERRORS 3 136 int sc_sendad_success; 137 #define CARP_SENDAD_MIN_SUCCESS 3 138 139 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 140 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 141 }; 142 143 struct carp_if { 144 #ifdef INET 145 int cif_naddrs; 146 #endif 147 #ifdef INET6 148 int cif_naddrs6; 149 #endif 150 TAILQ_HEAD(, carp_softc) cif_vrs; 151 #ifdef INET 152 struct ip_moptions cif_imo; 153 #endif 154 #ifdef INET6 155 struct ip6_moptions cif_im6o; 156 #endif 157 struct ifnet *cif_ifp; 158 struct mtx cif_mtx; 159 uint32_t cif_flags; 160 #define CIF_PROMISC 0x00000001 161 }; 162 163 /* Kernel equivalent of struct carpreq, but with more fields for new features. 164 * */ 165 struct carpkreq { 166 int carpr_count; 167 int carpr_vhid; 168 int carpr_state; 169 int carpr_advskew; 170 int carpr_advbase; 171 unsigned char carpr_key[CARP_KEY_LEN]; 172 /* Everything above this is identical to carpreq */ 173 struct in_addr carpr_addr; 174 struct in6_addr carpr_addr6; 175 carp_version_t carpr_version; 176 uint8_t carpr_vrrp_priority; 177 uint16_t carpr_vrrp_adv_inter; 178 }; 179 180 /* 181 * Brief design of carp(4). 182 * 183 * Any carp-capable ifnet may have a list of carp softcs hanging off 184 * its ifp->if_carp pointer. Each softc represents one unique virtual 185 * host id, or vhid. The softc has a back pointer to the ifnet. All 186 * softcs are joined in a global list, which has quite limited use. 187 * 188 * Any interface address that takes part in CARP negotiation has a 189 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 190 * AF_INET or AF_INET6 address. 191 * 192 * Although, one can get the softc's backpointer to ifnet and traverse 193 * through its ifp->if_addrhead queue to find all interface addresses 194 * involved in CARP, we keep a growable array of ifaddr pointers. This 195 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 196 * do calls into the network stack, thus avoiding LORs. 197 * 198 * Locking: 199 * 200 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 201 * callout-driven events and ioctl()s. 202 * 203 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 204 * To traverse the global list we use the mutex carp_mtx. 205 * 206 * Known issues with locking: 207 * 208 * - On module unload we may race (?) with packet processing thread 209 * dereferencing our function pointers. 210 */ 211 212 /* Accept incoming CARP packets. */ 213 VNET_DEFINE_STATIC(int, carp_allow) = 1; 214 #define V_carp_allow VNET(carp_allow) 215 216 /* Set DSCP in outgoing CARP packets. */ 217 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 218 #define V_carp_dscp VNET(carp_dscp) 219 220 /* Preempt slower nodes. */ 221 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 222 #define V_carp_preempt VNET(carp_preempt) 223 224 /* Log level. */ 225 VNET_DEFINE_STATIC(int, carp_log) = 1; 226 #define V_carp_log VNET(carp_log) 227 228 /* Global advskew demotion. */ 229 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 230 #define V_carp_demotion VNET(carp_demotion) 231 232 /* Send error demotion factor. */ 233 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 234 #define V_carp_senderr_adj VNET(carp_senderr_adj) 235 236 /* Iface down demotion factor. */ 237 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 238 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 239 240 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 241 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 242 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 243 244 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 245 "CARP"); 246 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 247 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 248 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 249 "Accept incoming CARP packets"); 250 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 251 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 252 0, 0, carp_dscp_sysctl, "I", 253 "DSCP value for carp packets"); 254 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 255 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 256 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 257 &VNET_NAME(carp_log), 0, "CARP log level"); 258 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 259 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 260 0, 0, carp_demote_adj_sysctl, "I", 261 "Adjust demotion factor (skew of advskew)"); 262 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 263 CTLFLAG_VNET | CTLFLAG_RW, 264 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 265 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 266 CTLFLAG_VNET | CTLFLAG_RW, 267 &VNET_NAME(carp_ifdown_adj), 0, 268 "Interface down demotion factor adjustment"); 269 270 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 271 VNET_PCPUSTAT_SYSINIT(carpstats); 272 VNET_PCPUSTAT_SYSUNINIT(carpstats); 273 274 #define CARPSTATS_ADD(name, val) \ 275 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 276 sizeof(uint64_t)], (val)) 277 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 278 279 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 280 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 281 282 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 283 NULL, MTX_DEF) 284 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 285 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 286 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 287 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 288 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 289 NULL, MTX_DEF) 290 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 291 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 292 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 293 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 294 #define CIF_FREE(cif) do { \ 295 CIF_LOCK(cif); \ 296 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 297 carp_free_if(cif); \ 298 else \ 299 CIF_UNLOCK(cif); \ 300 } while (0) 301 302 #define CARP_LOG(...) do { \ 303 if (V_carp_log > 0) \ 304 log(LOG_INFO, "carp: " __VA_ARGS__); \ 305 } while (0) 306 307 #define CARP_DEBUG(...) do { \ 308 if (V_carp_log > 1) \ 309 log(LOG_DEBUG, __VA_ARGS__); \ 310 } while (0) 311 312 #define IFNET_FOREACH_IFA(ifp, ifa) \ 313 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 314 if ((ifa)->ifa_carp != NULL) 315 316 #define CARP_FOREACH_IFA(sc, ifa) \ 317 CARP_LOCK_ASSERT(sc); \ 318 for (int _i = 0; \ 319 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 320 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 321 ++_i) 322 323 #define IFNET_FOREACH_CARP(ifp, sc) \ 324 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 325 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 326 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 327 328 #define DEMOTE_ADVSKEW(sc) \ 329 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 330 CARP_MAXSKEW : \ 331 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 332 0 : ((sc)->sc_advskew + V_carp_demotion))) 333 334 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 335 static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); 336 static struct carp_softc 337 *carp_alloc(struct ifnet *, carp_version_t, int); 338 static void carp_destroy(struct carp_softc *); 339 static struct carp_if 340 *carp_alloc_if(struct ifnet *); 341 static void carp_free_if(struct carp_if *); 342 static void carp_set_state(struct carp_softc *, int, const char* reason); 343 static void carp_sc_state(struct carp_softc *); 344 static void carp_setrun(struct carp_softc *, sa_family_t); 345 static void carp_master_down(void *); 346 static void carp_master_down_locked(struct carp_softc *, 347 const char* reason); 348 static void carp_send_ad_locked(struct carp_softc *); 349 static void vrrp_send_ad_locked(struct carp_softc *); 350 static void carp_addroute(struct carp_softc *); 351 static void carp_ifa_addroute(struct ifaddr *); 352 static void carp_delroute(struct carp_softc *); 353 static void carp_ifa_delroute(struct ifaddr *); 354 static void carp_send_ad_all(void *, int); 355 static void carp_demote_adj(int, char *); 356 357 static LIST_HEAD(, carp_softc) carp_list = LIST_HEAD_INITIALIZER(carp_list); 358 static struct mtx carp_mtx; 359 static struct sx carp_sx; 360 static struct task carp_sendall_task = 361 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 362 363 static int 364 carp_is_supported_if(if_t ifp) 365 { 366 if (ifp == NULL) 367 return (ENXIO); 368 369 switch (ifp->if_type) { 370 case IFT_ETHER: 371 case IFT_L2VLAN: 372 case IFT_BRIDGE: 373 break; 374 default: 375 return (EOPNOTSUPP); 376 } 377 378 return (0); 379 } 380 381 static void 382 carp_hmac_prepare(struct carp_softc *sc) 383 { 384 uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; 385 uint8_t vhid = sc->sc_vhid & 0xff; 386 struct ifaddr *ifa; 387 int i, found; 388 #ifdef INET 389 struct in_addr last, cur, in; 390 #endif 391 #ifdef INET6 392 struct in6_addr last6, cur6, in6; 393 #endif 394 395 CARP_LOCK_ASSERT(sc); 396 MPASS(sc->sc_version == CARP_VERSION_CARP); 397 398 /* Compute ipad from key. */ 399 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 400 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 401 for (i = 0; i < sizeof(sc->sc_pad); i++) 402 sc->sc_pad[i] ^= 0x36; 403 404 /* Precompute first part of inner hash. */ 405 SHA1Init(&sc->sc_sha1); 406 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 407 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 408 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 409 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 410 #ifdef INET 411 cur.s_addr = 0; 412 do { 413 found = 0; 414 last = cur; 415 cur.s_addr = 0xffffffff; 416 CARP_FOREACH_IFA(sc, ifa) { 417 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 418 if (ifa->ifa_addr->sa_family == AF_INET && 419 ntohl(in.s_addr) > ntohl(last.s_addr) && 420 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 421 cur.s_addr = in.s_addr; 422 found++; 423 } 424 } 425 if (found) 426 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 427 } while (found); 428 #endif /* INET */ 429 #ifdef INET6 430 memset(&cur6, 0, sizeof(cur6)); 431 do { 432 found = 0; 433 last6 = cur6; 434 memset(&cur6, 0xff, sizeof(cur6)); 435 CARP_FOREACH_IFA(sc, ifa) { 436 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 437 if (IN6_IS_SCOPE_EMBED(&in6)) 438 in6.s6_addr16[1] = 0; 439 if (ifa->ifa_addr->sa_family == AF_INET6 && 440 memcmp(&in6, &last6, sizeof(in6)) > 0 && 441 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 442 cur6 = in6; 443 found++; 444 } 445 } 446 if (found) 447 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 448 } while (found); 449 #endif /* INET6 */ 450 451 /* convert ipad to opad */ 452 for (i = 0; i < sizeof(sc->sc_pad); i++) 453 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 454 } 455 456 static void 457 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 458 unsigned char md[20]) 459 { 460 SHA1_CTX sha1ctx; 461 462 CARP_LOCK_ASSERT(sc); 463 464 /* fetch first half of inner hash */ 465 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 466 467 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 468 SHA1Final(md, &sha1ctx); 469 470 /* outer hash */ 471 SHA1Init(&sha1ctx); 472 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 473 SHA1Update(&sha1ctx, md, 20); 474 SHA1Final(md, &sha1ctx); 475 } 476 477 static int 478 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 479 unsigned char md[20]) 480 { 481 unsigned char md2[20]; 482 483 CARP_LOCK_ASSERT(sc); 484 485 carp_hmac_generate(sc, counter, md2); 486 487 return (bcmp(md, md2, sizeof(md2))); 488 } 489 490 static int 491 vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) 492 { 493 uint16_t cksum; 494 495 /* 496 * Note that VRRPv3 checksums are different from CARP checksums. 497 * Carp just calculates the checksum over the packet. 498 * VRRPv3 includes the pseudo-header checksum as well. 499 */ 500 cksum = in_cksum_skip(m, off + len, off); 501 cksum -= phdrcksum; 502 503 return (cksum); 504 } 505 506 /* 507 * process input packet. 508 * we have rearranged checks order compared to the rfc, 509 * but it seems more efficient this way or not possible otherwise. 510 */ 511 #ifdef INET 512 static int 513 carp_input(struct mbuf **mp, int *offp, int proto) 514 { 515 struct mbuf *m = *mp; 516 struct ip *ip; 517 struct vrrpv3_header *vh; 518 int iplen; 519 int minlen; 520 int totlen; 521 522 iplen = *offp; 523 *mp = NULL; 524 525 CARPSTATS_INC(carps_ipackets); 526 527 if (!V_carp_allow) { 528 m_freem(m); 529 return (IPPROTO_DONE); 530 } 531 532 /* Ensure we have enough header to figure out the version. */ 533 if (m->m_pkthdr.len < iplen + sizeof(*vh)) { 534 CARPSTATS_INC(carps_badlen); 535 CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " 536 "on %s\n", __func__, m->m_len - sizeof(struct ip), 537 if_name(m->m_pkthdr.rcvif)); 538 m_freem(m); 539 return (IPPROTO_DONE); 540 } 541 542 if (m->m_len < iplen + sizeof(*vh)) { 543 if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { 544 CARPSTATS_INC(carps_hdrops); 545 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 546 return (IPPROTO_DONE); 547 } 548 } 549 ip = mtod(m, struct ip *); 550 totlen = ntohs(ip->ip_len); 551 vh = (struct vrrpv3_header *)((char *)ip + iplen); 552 553 switch (vh->vrrp_version) { 554 case CARP_VERSION_CARP: 555 minlen = sizeof(struct carp_header); 556 break; 557 case CARP_VERSION_VRRPv3: 558 minlen = sizeof(struct vrrpv3_header); 559 break; 560 default: 561 CARPSTATS_INC(carps_badver); 562 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 563 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 564 m_freem(m); 565 return (IPPROTO_DONE); 566 } 567 568 /* And now check the length again but with the real minimal length. */ 569 if (m->m_pkthdr.len < iplen + minlen) { 570 CARPSTATS_INC(carps_badlen); 571 CARP_DEBUG("%s: received len %zd < %d " 572 "on %s\n", __func__, m->m_len - sizeof(struct ip), 573 iplen + minlen, 574 if_name(m->m_pkthdr.rcvif)); 575 m_freem(m); 576 return (IPPROTO_DONE); 577 } 578 579 if (m->m_len < iplen + minlen) { 580 if ((m = m_pullup(m, iplen + minlen)) == NULL) { 581 CARPSTATS_INC(carps_hdrops); 582 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 583 return (IPPROTO_DONE); 584 } 585 ip = mtod(m, struct ip *); 586 vh = (struct vrrpv3_header *)((char *)ip + iplen); 587 } 588 589 switch (vh->vrrp_version) { 590 case CARP_VERSION_CARP: { 591 struct carp_header *ch; 592 593 /* verify the CARP checksum */ 594 if (in_cksum_skip(m, totlen, iplen)) { 595 CARPSTATS_INC(carps_badsum); 596 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 597 if_name(m->m_pkthdr.rcvif)); 598 m_freem(m); 599 break; 600 } 601 ch = (struct carp_header *)((char *)ip + iplen); 602 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 603 break; 604 } 605 case CARP_VERSION_VRRPv3: { 606 uint16_t phdrcksum; 607 608 phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 609 htonl((u_short)(totlen - iplen) + ip->ip_p)); 610 vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, 611 phdrcksum); 612 break; 613 } 614 default: 615 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 616 } 617 618 return (IPPROTO_DONE); 619 } 620 #endif 621 622 #ifdef INET6 623 static int 624 carp6_input(struct mbuf **mp, int *offp, int proto) 625 { 626 struct mbuf *m = *mp; 627 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 628 struct vrrpv3_header *vh; 629 u_int len, minlen; 630 631 CARPSTATS_INC(carps_ipackets6); 632 633 if (!V_carp_allow) { 634 m_freem(m); 635 return (IPPROTO_DONE); 636 } 637 638 /* check if received on a valid carp interface */ 639 if (m->m_pkthdr.rcvif->if_carp == NULL) { 640 CARPSTATS_INC(carps_badif); 641 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 642 __func__, if_name(m->m_pkthdr.rcvif)); 643 m_freem(m); 644 return (IPPROTO_DONE); 645 } 646 647 if (m->m_len < *offp + sizeof(*vh)) { 648 len = m->m_len; 649 m = m_pullup(m, *offp + sizeof(*vh)); 650 if (m == NULL) { 651 CARPSTATS_INC(carps_badlen); 652 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 653 return (IPPROTO_DONE); 654 } 655 ip6 = mtod(m, struct ip6_hdr *); 656 } 657 vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); 658 659 switch (vh->vrrp_version) { 660 case CARP_VERSION_CARP: 661 minlen = sizeof(struct carp_header); 662 break; 663 case CARP_VERSION_VRRPv3: 664 minlen = sizeof(struct vrrpv3_header); 665 break; 666 default: 667 CARPSTATS_INC(carps_badver); 668 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 669 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 670 m_freem(m); 671 return (IPPROTO_DONE); 672 } 673 674 /* And now check the length again but with the real minimal length. */ 675 if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { 676 CARPSTATS_INC(carps_badlen); 677 CARP_DEBUG("%s: received len %zd < %zd " 678 "on %s\n", __func__, m->m_len - sizeof(struct ip), 679 sizeof(*ip6) + minlen, 680 if_name(m->m_pkthdr.rcvif)); 681 m_freem(m); 682 return (IPPROTO_DONE); 683 } 684 685 if (m->m_len < sizeof(*ip6) + minlen) { 686 if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { 687 CARPSTATS_INC(carps_hdrops); 688 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 689 return (IPPROTO_DONE); 690 } 691 ip6 = mtod(m, struct ip6_hdr *); 692 vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 693 } 694 695 switch (vh->vrrp_version) { 696 case CARP_VERSION_CARP: { 697 struct carp_header *ch; 698 699 /* verify the CARP checksum */ 700 if (in_cksum_skip(m, *offp + sizeof(struct carp_header), 701 *offp)) { 702 CARPSTATS_INC(carps_badsum); 703 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 704 if_name(m->m_pkthdr.rcvif)); 705 m_freem(m); 706 break; 707 } 708 ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); 709 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 710 break; 711 } 712 case CARP_VERSION_VRRPv3: { 713 uint16_t phdrcksum; 714 715 phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 716 ip6->ip6_nxt, 0); 717 vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, 718 ntohs(ip6->ip6_plen), phdrcksum); 719 break; 720 } 721 default: 722 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 723 } 724 return (IPPROTO_DONE); 725 } 726 #endif /* INET6 */ 727 728 /* 729 * This routine should not be necessary at all, but some switches 730 * (VMWare ESX vswitches) can echo our own packets back at us, 731 * and we must ignore them or they will cause us to drop out of 732 * MASTER mode. 733 * 734 * We cannot catch all cases of network loops. Instead, what we 735 * do here is catch any packet that arrives with a carp header 736 * with a VHID of 0, that comes from an address that is our own. 737 * These packets are by definition "from us" (even if they are from 738 * a misconfigured host that is pretending to be us). 739 * 740 * The VHID test is outside this mini-function. 741 */ 742 static int 743 carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 744 { 745 #ifdef INET 746 struct ip *ip4; 747 struct in_addr in4; 748 #endif 749 #ifdef INET6 750 struct ip6_hdr *ip6; 751 struct in6_addr in6; 752 #endif 753 754 switch (af) { 755 #ifdef INET 756 case AF_INET: 757 ip4 = mtod(m, struct ip *); 758 in4 = ifatoia(ifa)->ia_addr.sin_addr; 759 return (in4.s_addr == ip4->ip_src.s_addr); 760 #endif 761 #ifdef INET6 762 case AF_INET6: 763 ip6 = mtod(m, struct ip6_hdr *); 764 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 765 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 766 #endif 767 default: 768 break; 769 } 770 return (0); 771 } 772 773 static struct ifaddr * 774 carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) 775 { 776 struct ifnet *ifp = m->m_pkthdr.rcvif; 777 struct ifaddr *ifa, *match; 778 int error; 779 780 NET_EPOCH_ASSERT(); 781 782 /* 783 * Verify that the VHID is valid on the receiving interface. 784 * 785 * There should be just one match. If there are none 786 * the VHID is not valid and we drop the packet. If 787 * there are multiple VHID matches, take just the first 788 * one, for compatibility with previous code. While we're 789 * scanning, check for obvious loops in the network topology 790 * (these should never happen, and as noted above, we may 791 * miss real loops; this is just a double-check). 792 */ 793 error = 0; 794 match = NULL; 795 IFNET_FOREACH_IFA(ifp, ifa) { 796 if (match == NULL && ifa->ifa_carp != NULL && 797 ifa->ifa_addr->sa_family == af && 798 ifa->ifa_carp->sc_vhid == vhid) 799 match = ifa; 800 if (vhid == 0 && carp_source_is_self(m, ifa, af)) 801 error = ELOOP; 802 } 803 ifa = error ? NULL : match; 804 if (ifa != NULL) 805 ifa_ref(ifa); 806 807 if (ifa == NULL) { 808 if (error == ELOOP) { 809 CARP_DEBUG("dropping looped packet on interface %s\n", 810 if_name(ifp)); 811 CARPSTATS_INC(carps_badif); /* ??? */ 812 } else { 813 CARPSTATS_INC(carps_badvhid); 814 } 815 } 816 817 return (ifa); 818 } 819 820 static void 821 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 822 { 823 struct ifnet *ifp = m->m_pkthdr.rcvif; 824 struct ifaddr *ifa; 825 struct carp_softc *sc; 826 uint64_t tmp_counter; 827 struct timeval sc_tv, ch_tv; 828 bool multicast = false; 829 830 NET_EPOCH_ASSERT(); 831 MPASS(ch->carp_version == CARP_VERSION_CARP); 832 833 ifa = carp_find_ifa(m, af, ch->carp_vhid); 834 if (ifa == NULL) { 835 m_freem(m); 836 return; 837 } 838 839 sc = ifa->ifa_carp; 840 CARP_LOCK(sc); 841 842 /* verify the CARP version. */ 843 if (sc->sc_version != CARP_VERSION_CARP) { 844 CARP_UNLOCK(sc); 845 846 CARPSTATS_INC(carps_badver); 847 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 848 ch->carp_version); 849 ifa_free(ifa); 850 m_freem(m); 851 return; 852 } 853 854 if (ifa->ifa_addr->sa_family == AF_INET) { 855 multicast = IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)); 856 } else { 857 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 858 } 859 ifa_free(ifa); 860 861 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 862 if (multicast && ttl != CARP_DFLTTL) { 863 CARPSTATS_INC(carps_badttl); 864 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 865 ttl, if_name(m->m_pkthdr.rcvif)); 866 goto out; 867 } 868 869 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 870 CARPSTATS_INC(carps_badauth); 871 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 872 sc->sc_vhid, if_name(ifp)); 873 goto out; 874 } 875 876 tmp_counter = ntohl(ch->carp_counter[0]); 877 tmp_counter = tmp_counter<<32; 878 tmp_counter += ntohl(ch->carp_counter[1]); 879 880 /* XXX Replay protection goes here */ 881 882 sc->sc_init_counter = false; 883 sc->sc_counter = tmp_counter; 884 885 sc_tv.tv_sec = sc->sc_advbase; 886 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 887 ch_tv.tv_sec = ch->carp_advbase; 888 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 889 890 switch (sc->sc_state) { 891 case INIT: 892 break; 893 case MASTER: 894 /* 895 * If we receive an advertisement from a master who's going to 896 * be more frequent than us, go into BACKUP state. 897 */ 898 if (timevalcmp(&sc_tv, &ch_tv, >) || 899 timevalcmp(&sc_tv, &ch_tv, ==)) { 900 callout_stop(&sc->sc_ad_tmo); 901 carp_set_state(sc, BACKUP, 902 "more frequent advertisement received"); 903 carp_setrun(sc, 0); 904 carp_delroute(sc); 905 } 906 break; 907 case BACKUP: 908 /* 909 * If we're pre-empting masters who advertise slower than us, 910 * and this one claims to be slower, treat him as down. 911 */ 912 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 913 carp_master_down_locked(sc, 914 "preempting a slower master"); 915 break; 916 } 917 918 /* 919 * If the master is going to advertise at such a low frequency 920 * that he's guaranteed to time out, we'd might as well just 921 * treat him as timed out now. 922 */ 923 sc_tv.tv_sec = sc->sc_advbase * 3; 924 if (timevalcmp(&sc_tv, &ch_tv, <)) { 925 carp_master_down_locked(sc, "master will time out"); 926 break; 927 } 928 929 /* 930 * Otherwise, we reset the counter and wait for the next 931 * advertisement. 932 */ 933 carp_setrun(sc, af); 934 break; 935 } 936 937 out: 938 CARP_UNLOCK(sc); 939 m_freem(m); 940 } 941 942 static void 943 vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, 944 int len, uint16_t phdrcksum) 945 { 946 struct vrrpv3_header *vh = mtodo(m, off); 947 struct ifnet *ifp = m->m_pkthdr.rcvif; 948 struct ifaddr *ifa; 949 struct carp_softc *sc; 950 951 NET_EPOCH_ASSERT(); 952 MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); 953 954 ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); 955 if (ifa == NULL) { 956 m_freem(m); 957 return; 958 } 959 960 sc = ifa->ifa_carp; 961 CARP_LOCK(sc); 962 963 ifa_free(ifa); 964 965 /* verify the CARP version. */ 966 if (sc->sc_version != CARP_VERSION_VRRPv3) { 967 CARP_UNLOCK(sc); 968 969 CARPSTATS_INC(carps_badver); 970 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 971 vh->vrrp_version); 972 m_freem(m); 973 return; 974 } 975 976 /* verify that the IP TTL is 255. */ 977 if (ttl != CARP_DFLTTL) { 978 CARPSTATS_INC(carps_badttl); 979 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 980 ttl, if_name(m->m_pkthdr.rcvif)); 981 goto out; 982 } 983 984 if (vrrp_checksum_verify(m, off, len, phdrcksum)) { 985 CARPSTATS_INC(carps_badsum); 986 CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, 987 sc->sc_vhid, if_name(ifp)); 988 goto out; 989 } 990 991 /* RFC9568, 7.1 Receiving VRRP packets. */ 992 if (sc->sc_vrrp_prio == 255) { 993 CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", 994 __func__); 995 goto out; 996 } 997 998 /* XXX TODO Check IP address payload. */ 999 1000 sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); 1001 1002 switch (sc->sc_state) { 1003 case INIT: 1004 break; 1005 case MASTER: 1006 /* 1007 * If we receive an advertisement from a master who's going to 1008 * be more frequent than us, go into BACKUP state. 1009 * Same if the peer has a higher priority than us. 1010 */ 1011 if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || 1012 vh->vrrp_priority > sc->sc_vrrp_prio) { 1013 callout_stop(&sc->sc_ad_tmo); 1014 carp_set_state(sc, BACKUP, 1015 "more frequent advertisement received"); 1016 carp_setrun(sc, 0); 1017 carp_delroute(sc); 1018 } 1019 break; 1020 case BACKUP: 1021 /* 1022 * If we're pre-empting masters who advertise slower than us, 1023 * and this one claims to be slower, treat him as down. 1024 */ 1025 if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter 1026 || vh->vrrp_priority < sc->sc_vrrp_prio)) { 1027 carp_master_down_locked(sc, 1028 "preempting a slower master"); 1029 break; 1030 } 1031 1032 /* 1033 * Otherwise, we reset the counter and wait for the next 1034 * advertisement. 1035 */ 1036 carp_setrun(sc, af); 1037 break; 1038 } 1039 1040 out: 1041 CARP_UNLOCK(sc); 1042 m_freem(m); 1043 } 1044 1045 static int 1046 carp_tag(struct carp_softc *sc, struct mbuf *m) 1047 { 1048 struct m_tag *mtag; 1049 1050 /* Tag packet for carp_output */ 1051 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), 1052 M_NOWAIT)) == NULL) { 1053 m_freem(m); 1054 CARPSTATS_INC(carps_onomem); 1055 return (ENOMEM); 1056 } 1057 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1058 m_tag_prepend(m, mtag); 1059 1060 return (0); 1061 } 1062 1063 static void 1064 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 1065 { 1066 1067 MPASS(sc->sc_version == CARP_VERSION_CARP); 1068 1069 if (sc->sc_init_counter) { 1070 /* this could also be seconds since unix epoch */ 1071 sc->sc_counter = arc4random(); 1072 sc->sc_counter = sc->sc_counter << 32; 1073 sc->sc_counter += arc4random(); 1074 } else 1075 sc->sc_counter++; 1076 1077 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 1078 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 1079 1080 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1081 } 1082 1083 static inline void 1084 send_ad_locked(struct carp_softc *sc) 1085 { 1086 switch (sc->sc_version) { 1087 case CARP_VERSION_CARP: 1088 carp_send_ad_locked(sc); 1089 break; 1090 case CARP_VERSION_VRRPv3: 1091 vrrp_send_ad_locked(sc); 1092 break; 1093 } 1094 } 1095 1096 /* 1097 * To avoid LORs and possible recursions this function shouldn't 1098 * be called directly, but scheduled via taskqueue. 1099 */ 1100 static void 1101 carp_send_ad_all(void *ctx __unused, int pending __unused) 1102 { 1103 struct carp_softc *sc; 1104 struct epoch_tracker et; 1105 1106 NET_EPOCH_ENTER(et); 1107 mtx_lock(&carp_mtx); 1108 LIST_FOREACH(sc, &carp_list, sc_next) 1109 if (sc->sc_state == MASTER) { 1110 CARP_LOCK(sc); 1111 CURVNET_SET(sc->sc_carpdev->if_vnet); 1112 send_ad_locked(sc); 1113 CURVNET_RESTORE(); 1114 CARP_UNLOCK(sc); 1115 } 1116 mtx_unlock(&carp_mtx); 1117 NET_EPOCH_EXIT(et); 1118 } 1119 1120 /* Send a periodic advertisement, executed in callout context. */ 1121 static void 1122 carp_callout(void *v) 1123 { 1124 struct carp_softc *sc = v; 1125 struct epoch_tracker et; 1126 1127 NET_EPOCH_ENTER(et); 1128 CARP_LOCK_ASSERT(sc); 1129 CURVNET_SET(sc->sc_carpdev->if_vnet); 1130 send_ad_locked(sc); 1131 CURVNET_RESTORE(); 1132 CARP_UNLOCK(sc); 1133 NET_EPOCH_EXIT(et); 1134 } 1135 1136 static void 1137 carp_send_ad_error(struct carp_softc *sc, int error) 1138 { 1139 1140 /* 1141 * We track errors and successful sends with this logic: 1142 * - Any error resets success counter to 0. 1143 * - MAX_ERRORS triggers demotion. 1144 * - MIN_SUCCESS successes resets error counter to 0. 1145 * - MIN_SUCCESS reverts demotion, if it was triggered before. 1146 */ 1147 if (error) { 1148 if (sc->sc_sendad_errors < INT_MAX) 1149 sc->sc_sendad_errors++; 1150 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1151 static const char fmt[] = "send error %d on %s"; 1152 char msg[sizeof(fmt) + IFNAMSIZ]; 1153 1154 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 1155 carp_demote_adj(V_carp_senderr_adj, msg); 1156 } 1157 sc->sc_sendad_success = 0; 1158 } else if (sc->sc_sendad_errors > 0) { 1159 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 1160 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1161 static const char fmt[] = "send ok on %s"; 1162 char msg[sizeof(fmt) + IFNAMSIZ]; 1163 1164 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 1165 carp_demote_adj(-V_carp_senderr_adj, msg); 1166 } 1167 sc->sc_sendad_errors = 0; 1168 } 1169 } 1170 } 1171 1172 /* 1173 * Pick the best ifaddr on the given ifp for sending CARP 1174 * advertisements. 1175 * 1176 * "Best" here is defined by ifa_preferred(). This function is much 1177 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 1178 * 1179 * (This could be simplified to return the actual address, except that 1180 * it has a different format in AF_INET and AF_INET6.) 1181 */ 1182 static struct ifaddr * 1183 carp_best_ifa(int af, struct ifnet *ifp) 1184 { 1185 struct ifaddr *ifa, *best; 1186 1187 NET_EPOCH_ASSERT(); 1188 1189 if (af >= AF_MAX) 1190 return (NULL); 1191 best = NULL; 1192 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1193 if (ifa->ifa_addr->sa_family == af && 1194 (best == NULL || ifa_preferred(best, ifa))) 1195 best = ifa; 1196 } 1197 if (best != NULL) 1198 ifa_ref(best); 1199 return (best); 1200 } 1201 1202 static void 1203 carp_send_ad_locked(struct carp_softc *sc) 1204 { 1205 struct carp_header ch; 1206 struct timeval tv; 1207 struct ifaddr *ifa; 1208 struct carp_header *ch_ptr; 1209 struct mbuf *m; 1210 int len, advskew; 1211 1212 NET_EPOCH_ASSERT(); 1213 CARP_LOCK_ASSERT(sc); 1214 MPASS(sc->sc_version == CARP_VERSION_CARP); 1215 1216 advskew = DEMOTE_ADVSKEW(sc); 1217 tv.tv_sec = sc->sc_advbase; 1218 tv.tv_usec = advskew * 1000000 / 256; 1219 1220 ch.carp_version = CARP_VERSION_CARP; 1221 ch.carp_type = CARP_ADVERTISEMENT; 1222 ch.carp_vhid = sc->sc_vhid; 1223 ch.carp_advbase = sc->sc_advbase; 1224 ch.carp_advskew = advskew; 1225 ch.carp_authlen = 7; /* XXX DEFINE */ 1226 ch.carp_pad1 = 0; /* must be zero */ 1227 ch.carp_cksum = 0; 1228 1229 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 1230 1231 #ifdef INET 1232 if (sc->sc_naddrs) { 1233 struct ip *ip; 1234 1235 m = m_gethdr(M_NOWAIT, MT_DATA); 1236 if (m == NULL) { 1237 CARPSTATS_INC(carps_onomem); 1238 goto resched; 1239 } 1240 len = sizeof(*ip) + sizeof(ch); 1241 m->m_pkthdr.len = len; 1242 m->m_pkthdr.rcvif = NULL; 1243 m->m_len = len; 1244 M_ALIGN(m, m->m_len); 1245 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr))) 1246 m->m_flags |= M_MCAST; 1247 ip = mtod(m, struct ip *); 1248 ip->ip_v = IPVERSION; 1249 ip->ip_hl = sizeof(*ip) >> 2; 1250 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1251 ip->ip_len = htons(len); 1252 ip->ip_off = htons(IP_DF); 1253 ip->ip_ttl = CARP_DFLTTL; 1254 ip->ip_p = IPPROTO_CARP; 1255 ip->ip_sum = 0; 1256 ip_fillid(ip, V_ip_random_id); 1257 1258 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1259 if (ifa != NULL) { 1260 ip->ip_src.s_addr = 1261 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1262 ifa_free(ifa); 1263 } else 1264 ip->ip_src.s_addr = 0; 1265 ip->ip_dst = sc->sc_carpaddr; 1266 1267 ch_ptr = (struct carp_header *)(&ip[1]); 1268 bcopy(&ch, ch_ptr, sizeof(ch)); 1269 carp_prepare_ad(m, sc, ch_ptr); 1270 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && 1271 carp_tag(sc, m) != 0) 1272 goto resched; 1273 1274 m->m_data += sizeof(*ip); 1275 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1276 m->m_data -= sizeof(*ip); 1277 1278 CARPSTATS_INC(carps_opackets); 1279 1280 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1281 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1282 } 1283 #endif /* INET */ 1284 #ifdef INET6 1285 if (sc->sc_naddrs6) { 1286 struct ip6_hdr *ip6; 1287 1288 m = m_gethdr(M_NOWAIT, MT_DATA); 1289 if (m == NULL) { 1290 CARPSTATS_INC(carps_onomem); 1291 goto resched; 1292 } 1293 len = sizeof(*ip6) + sizeof(ch); 1294 m->m_pkthdr.len = len; 1295 m->m_pkthdr.rcvif = NULL; 1296 m->m_len = len; 1297 M_ALIGN(m, m->m_len); 1298 ip6 = mtod(m, struct ip6_hdr *); 1299 bzero(ip6, sizeof(*ip6)); 1300 ip6->ip6_vfc |= IPV6_VERSION; 1301 /* Traffic class isn't defined in ip6 struct instead 1302 * it gets offset into flowid field */ 1303 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1304 IPTOS_DSCP_OFFSET)); 1305 ip6->ip6_hlim = CARP_DFLTTL; 1306 ip6->ip6_nxt = IPPROTO_CARP; 1307 1308 /* set the source address */ 1309 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1310 if (ifa != NULL) { 1311 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1312 sizeof(struct in6_addr)); 1313 ifa_free(ifa); 1314 } else 1315 /* This should never happen with IPv6. */ 1316 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1317 1318 /* Set the multicast destination. */ 1319 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1320 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1321 IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { 1322 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1323 m_freem(m); 1324 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1325 goto resched; 1326 } 1327 } 1328 1329 ch_ptr = (struct carp_header *)(&ip6[1]); 1330 bcopy(&ch, ch_ptr, sizeof(ch)); 1331 carp_prepare_ad(m, sc, ch_ptr); 1332 if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && 1333 carp_tag(sc, m) != 0) 1334 goto resched; 1335 1336 m->m_data += sizeof(*ip6); 1337 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1338 m->m_data -= sizeof(*ip6); 1339 1340 CARPSTATS_INC(carps_opackets6); 1341 1342 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1343 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1344 } 1345 #endif /* INET6 */ 1346 1347 resched: 1348 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); 1349 } 1350 1351 static void 1352 vrrp_send_ad_locked(struct carp_softc *sc) 1353 { 1354 struct vrrpv3_header *vh_ptr; 1355 struct ifaddr *ifa; 1356 struct mbuf *m; 1357 int len; 1358 struct vrrpv3_header vh = { 1359 .vrrp_version = CARP_VERSION_VRRPv3, 1360 .vrrp_type = VRRP_TYPE_ADVERTISEMENT, 1361 .vrrp_vrtid = sc->sc_vhid, 1362 .vrrp_priority = sc->sc_vrrp_prio, 1363 .vrrp_count_addr = 0, 1364 .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), 1365 .vrrp_checksum = 0, 1366 }; 1367 1368 NET_EPOCH_ASSERT(); 1369 CARP_LOCK_ASSERT(sc); 1370 MPASS(sc->sc_version == CARP_VERSION_VRRPv3); 1371 1372 #ifdef INET 1373 if (sc->sc_naddrs) { 1374 struct ip *ip; 1375 1376 m = m_gethdr(M_NOWAIT, MT_DATA); 1377 if (m == NULL) { 1378 CARPSTATS_INC(carps_onomem); 1379 goto resched; 1380 } 1381 len = sizeof(*ip) + sizeof(vh); 1382 m->m_pkthdr.len = len; 1383 m->m_pkthdr.rcvif = NULL; 1384 m->m_len = len; 1385 M_ALIGN(m, m->m_len); 1386 m->m_flags |= M_MCAST; 1387 ip = mtod(m, struct ip *); 1388 ip->ip_v = IPVERSION; 1389 ip->ip_hl = sizeof(*ip) >> 2; 1390 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1391 ip->ip_off = htons(IP_DF); 1392 ip->ip_ttl = CARP_DFLTTL; 1393 ip->ip_p = IPPROTO_CARP; 1394 ip->ip_sum = 0; 1395 ip_fillid(ip, V_ip_random_id); 1396 1397 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1398 if (ifa != NULL) { 1399 ip->ip_src.s_addr = 1400 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1401 ifa_free(ifa); 1402 } else 1403 ip->ip_src.s_addr = 0; 1404 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1405 1406 /* Include the IP addresses in the announcement. */ 1407 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1408 struct sockaddr_in *in; 1409 1410 MPASS(sc->sc_ifas[i] != NULL); 1411 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) 1412 continue; 1413 1414 in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; 1415 1416 if (m_append(m, sizeof(in->sin_addr), 1417 (caddr_t)&in->sin_addr) != 1) { 1418 m_freem(m); 1419 goto resched; 1420 } 1421 1422 vh.vrrp_count_addr++; 1423 len += sizeof(in->sin_addr); 1424 } 1425 ip->ip_len = htons(len); 1426 1427 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); 1428 bcopy(&vh, vh_ptr, sizeof(vh)); 1429 1430 vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, 1431 ip->ip_dst.s_addr, 1432 htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); 1433 vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); 1434 1435 if (carp_tag(sc, m)) 1436 goto resched; 1437 1438 CARPSTATS_INC(carps_opackets); 1439 1440 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1441 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1442 } 1443 #endif 1444 #ifdef INET6 1445 if (sc->sc_naddrs6) { 1446 struct ip6_hdr *ip6; 1447 1448 m = m_gethdr(M_NOWAIT, MT_DATA); 1449 if (m == NULL) { 1450 CARPSTATS_INC(carps_onomem); 1451 goto resched; 1452 } 1453 len = sizeof(*ip6) + sizeof(vh); 1454 m->m_pkthdr.len = len; 1455 m->m_pkthdr.rcvif = NULL; 1456 m->m_len = len; 1457 M_ALIGN(m, m->m_len); 1458 m->m_flags |= M_MCAST; 1459 ip6 = mtod(m, struct ip6_hdr *); 1460 bzero(ip6, sizeof(*ip6)); 1461 ip6->ip6_vfc |= IPV6_VERSION; 1462 /* Traffic class isn't defined in ip6 struct instead 1463 * it gets offset into flowid field */ 1464 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1465 IPTOS_DSCP_OFFSET)); 1466 ip6->ip6_hlim = CARP_DFLTTL; 1467 ip6->ip6_nxt = IPPROTO_CARP; 1468 1469 /* set the source address */ 1470 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1471 if (ifa != NULL) { 1472 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1473 sizeof(struct in6_addr)); 1474 ifa_free(ifa); 1475 } else 1476 /* This should never happen with IPv6. */ 1477 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1478 1479 /* Set the multicast destination. */ 1480 bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); 1481 ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1482 ip6->ip6_dst.s6_addr8[15] = 0x12; 1483 1484 /* Include the IP addresses in the announcement. */ 1485 len = sizeof(vh); 1486 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1487 struct sockaddr_in6 *in6; 1488 1489 MPASS(sc->sc_ifas[i] != NULL); 1490 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) 1491 continue; 1492 1493 in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; 1494 1495 if (m_append(m, sizeof(in6->sin6_addr), 1496 (char *)&in6->sin6_addr) != 1) { 1497 m_freem(m); 1498 goto resched; 1499 } 1500 1501 vh.vrrp_count_addr++; 1502 len += sizeof(in6->sin6_addr); 1503 } 1504 ip6->ip6_plen = htonl(len); 1505 1506 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 1507 bcopy(&vh, vh_ptr, sizeof(vh)); 1508 1509 vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); 1510 vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); 1511 1512 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1513 m_freem(m); 1514 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1515 goto resched; 1516 } 1517 1518 if (carp_tag(sc, m)) 1519 goto resched; 1520 CARPSTATS_INC(carps_opackets6); 1521 1522 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1523 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1524 } 1525 #endif 1526 1527 resched: 1528 callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, 1529 carp_callout, sc); 1530 } 1531 1532 static void 1533 carp_addroute(struct carp_softc *sc) 1534 { 1535 struct ifaddr *ifa; 1536 1537 CARP_FOREACH_IFA(sc, ifa) 1538 carp_ifa_addroute(ifa); 1539 } 1540 1541 static void 1542 carp_ifa_addroute(struct ifaddr *ifa) 1543 { 1544 1545 switch (ifa->ifa_addr->sa_family) { 1546 #ifdef INET 1547 case AF_INET: 1548 in_addprefix(ifatoia(ifa)); 1549 ifa_add_loopback_route(ifa, 1550 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1551 break; 1552 #endif 1553 #ifdef INET6 1554 case AF_INET6: 1555 ifa_add_loopback_route(ifa, 1556 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1557 nd6_add_ifa_lle(ifatoia6(ifa)); 1558 break; 1559 #endif 1560 } 1561 } 1562 1563 static void 1564 carp_delroute(struct carp_softc *sc) 1565 { 1566 struct ifaddr *ifa; 1567 1568 CARP_FOREACH_IFA(sc, ifa) 1569 carp_ifa_delroute(ifa); 1570 } 1571 1572 static void 1573 carp_ifa_delroute(struct ifaddr *ifa) 1574 { 1575 1576 switch (ifa->ifa_addr->sa_family) { 1577 #ifdef INET 1578 case AF_INET: 1579 ifa_del_loopback_route(ifa, 1580 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1581 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1582 break; 1583 #endif 1584 #ifdef INET6 1585 case AF_INET6: 1586 ifa_del_loopback_route(ifa, 1587 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1588 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1589 break; 1590 #endif 1591 } 1592 } 1593 1594 int 1595 carp_master(struct ifaddr *ifa) 1596 { 1597 struct carp_softc *sc = ifa->ifa_carp; 1598 1599 return (sc->sc_state == MASTER); 1600 } 1601 1602 #ifdef INET 1603 /* 1604 * Broadcast a gratuitous ARP request containing 1605 * the virtual router MAC address for each IP address 1606 * associated with the virtual router. 1607 */ 1608 static void 1609 carp_send_arp(struct carp_softc *sc) 1610 { 1611 struct ifaddr *ifa; 1612 struct in_addr addr; 1613 1614 NET_EPOCH_ASSERT(); 1615 1616 CARP_FOREACH_IFA(sc, ifa) { 1617 if (ifa->ifa_addr->sa_family != AF_INET) 1618 continue; 1619 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1620 arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); 1621 } 1622 } 1623 1624 int 1625 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1626 { 1627 struct carp_softc *sc = ifa->ifa_carp; 1628 1629 if (sc->sc_state == MASTER) { 1630 *enaddr = sc->sc_addr; 1631 return (1); 1632 } 1633 1634 return (0); 1635 } 1636 #endif 1637 1638 #ifdef INET6 1639 static void 1640 carp_send_na(struct carp_softc *sc) 1641 { 1642 struct ifaddr *ifa; 1643 int flags; 1644 1645 /* 1646 * Sending Unsolicited Neighbor Advertisements 1647 * 1648 * If the node is a router, we MUST set the Router flag to one. 1649 * We set Override flag to one and send link-layer address option, 1650 * thus neighboring nodes will install the new link-layer address. 1651 */ 1652 flags = ND_NA_FLAG_OVERRIDE; 1653 if (V_ip6_forwarding) 1654 flags |= ND_NA_FLAG_ROUTER; 1655 CARP_FOREACH_IFA(sc, ifa) { 1656 if (ifa->ifa_addr->sa_family != AF_INET6) 1657 continue; 1658 /* 1659 * We use unspecified address as destination here to avoid 1660 * scope initialization for each call. 1661 * nd6_na_output() will use all nodes multicast address if 1662 * destinaion address is unspecified. 1663 */ 1664 nd6_na_output(sc->sc_carpdev, &in6addr_any, IFA_IN6(ifa), 1665 flags, ND6_NA_OPT_LLA | ND6_NA_CARP_MASTER, NULL); 1666 DELAY(1000); /* RetransTimer */ 1667 } 1668 } 1669 1670 /* 1671 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1672 * matches and is not a carp address. Returns NULL otherwise. 1673 */ 1674 struct ifaddr * 1675 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1676 { 1677 struct ifaddr *ifa; 1678 1679 NET_EPOCH_ASSERT(); 1680 1681 ifa = NULL; 1682 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1683 if (ifa->ifa_addr->sa_family != AF_INET6) 1684 continue; 1685 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1686 continue; 1687 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1688 ifa = NULL; 1689 else 1690 ifa_ref(ifa); 1691 break; 1692 } 1693 1694 return (ifa); 1695 } 1696 1697 char * 1698 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1699 { 1700 struct ifaddr *ifa; 1701 char *mac = NULL; 1702 1703 NET_EPOCH_ASSERT(); 1704 1705 IFNET_FOREACH_IFA(ifp, ifa) 1706 if (ifa->ifa_addr->sa_family == AF_INET6 && 1707 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1708 struct carp_softc *sc = ifa->ifa_carp; 1709 struct m_tag *mtag; 1710 1711 mtag = m_tag_get(PACKET_TAG_CARP, 1712 sizeof(sc->sc_vhid) + sizeof(sc->sc_addr), 1713 M_NOWAIT); 1714 if (mtag == NULL) { 1715 CARPSTATS_INC(carps_onomem); 1716 break; 1717 } 1718 /* carp_output expects sc_vhid first. */ 1719 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1720 /* 1721 * Save sc_addr into mtag data after sc_vhid to avoid 1722 * possible access to destroyed softc. 1723 */ 1724 mac = (char *)(mtag + 1) + sizeof(sc->sc_vhid); 1725 bcopy(sc->sc_addr, mac, sizeof(sc->sc_addr)); 1726 1727 m_tag_prepend(m, mtag); 1728 break; 1729 } 1730 1731 return (mac); 1732 } 1733 #endif /* INET6 */ 1734 1735 int 1736 carp_forus(struct ifnet *ifp, u_char *dhost) 1737 { 1738 struct carp_softc *sc; 1739 uint8_t *ena = dhost; 1740 1741 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1742 return (0); 1743 1744 CIF_LOCK(ifp->if_carp); 1745 IFNET_FOREACH_CARP(ifp, sc) { 1746 /* 1747 * CARP_LOCK() is not here, since would protect nothing, but 1748 * cause deadlock with if_bridge, calling this under its lock. 1749 */ 1750 if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, 1751 ETHER_ADDR_LEN)) { 1752 CIF_UNLOCK(ifp->if_carp); 1753 return (1); 1754 } 1755 } 1756 CIF_UNLOCK(ifp->if_carp); 1757 1758 return (0); 1759 } 1760 1761 /* Master down timeout event, executed in callout context. */ 1762 static void 1763 carp_master_down(void *v) 1764 { 1765 struct carp_softc *sc = v; 1766 struct epoch_tracker et; 1767 1768 NET_EPOCH_ENTER(et); 1769 CARP_LOCK_ASSERT(sc); 1770 1771 CURVNET_SET(sc->sc_carpdev->if_vnet); 1772 if (sc->sc_state == BACKUP) { 1773 carp_master_down_locked(sc, "master timed out"); 1774 } 1775 CURVNET_RESTORE(); 1776 1777 CARP_UNLOCK(sc); 1778 NET_EPOCH_EXIT(et); 1779 } 1780 1781 static void 1782 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1783 { 1784 1785 NET_EPOCH_ASSERT(); 1786 CARP_LOCK_ASSERT(sc); 1787 1788 switch (sc->sc_state) { 1789 case BACKUP: 1790 carp_set_state(sc, MASTER, reason); 1791 send_ad_locked(sc); 1792 #ifdef INET 1793 carp_send_arp(sc); 1794 #endif 1795 #ifdef INET6 1796 carp_send_na(sc); 1797 #endif 1798 carp_setrun(sc, 0); 1799 carp_addroute(sc); 1800 break; 1801 case INIT: 1802 case MASTER: 1803 #ifdef INVARIANTS 1804 panic("carp: VHID %u@%s: master_down event in %s state\n", 1805 sc->sc_vhid, 1806 if_name(sc->sc_carpdev), 1807 sc->sc_state ? "MASTER" : "INIT"); 1808 #endif 1809 break; 1810 } 1811 } 1812 1813 /* 1814 * When in backup state, af indicates whether to reset the master down timer 1815 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1816 */ 1817 static void 1818 carp_setrun(struct carp_softc *sc, sa_family_t af) 1819 { 1820 struct timeval tv; 1821 int timeout; 1822 1823 CARP_LOCK_ASSERT(sc); 1824 1825 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1826 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1827 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1828 !V_carp_allow) 1829 return; 1830 1831 switch (sc->sc_state) { 1832 case INIT: 1833 carp_set_state(sc, BACKUP, "initialization complete"); 1834 carp_setrun(sc, 0); 1835 break; 1836 case BACKUP: 1837 callout_stop(&sc->sc_ad_tmo); 1838 1839 switch (sc->sc_version) { 1840 case CARP_VERSION_CARP: 1841 tv.tv_sec = 3 * sc->sc_advbase; 1842 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1843 timeout = tvtohz(&tv); 1844 break; 1845 case CARP_VERSION_VRRPv3: 1846 /* skew time */ 1847 timeout = (256 - sc->sc_vrrp_prio) * 1848 sc->sc_vrrp_master_inter / 256; 1849 timeout += (3 * sc->sc_vrrp_master_inter); 1850 timeout *= hz; 1851 timeout /= 100; /* master interval is in centiseconds */ 1852 break; 1853 } 1854 switch (af) { 1855 #ifdef INET 1856 case AF_INET: 1857 callout_reset(&sc->sc_md_tmo, timeout, 1858 carp_master_down, sc); 1859 break; 1860 #endif 1861 #ifdef INET6 1862 case AF_INET6: 1863 callout_reset(&sc->sc_md6_tmo, timeout, 1864 carp_master_down, sc); 1865 break; 1866 #endif 1867 default: 1868 #ifdef INET 1869 if (sc->sc_naddrs) 1870 callout_reset(&sc->sc_md_tmo, timeout, 1871 carp_master_down, sc); 1872 #endif 1873 #ifdef INET6 1874 if (sc->sc_naddrs6) 1875 callout_reset(&sc->sc_md6_tmo, timeout, 1876 carp_master_down, sc); 1877 #endif 1878 break; 1879 } 1880 break; 1881 case MASTER: 1882 switch (sc->sc_version) { 1883 case CARP_VERSION_CARP: 1884 tv.tv_sec = sc->sc_advbase; 1885 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1886 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1887 carp_callout, sc); 1888 break; 1889 case CARP_VERSION_VRRPv3: 1890 callout_reset(&sc->sc_ad_tmo, 1891 sc->sc_vrrp_adv_inter * hz / 100, 1892 carp_callout, sc); 1893 break; 1894 } 1895 break; 1896 } 1897 } 1898 1899 /* 1900 * Setup multicast structures. 1901 */ 1902 static int 1903 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1904 { 1905 struct ifnet *ifp = cif->cif_ifp; 1906 int error = 0; 1907 1908 switch (sa) { 1909 #ifdef INET 1910 case AF_INET: 1911 { 1912 struct ip_moptions *imo = &cif->cif_imo; 1913 struct in_mfilter *imf; 1914 struct in_addr addr; 1915 1916 if (ip_mfilter_first(&imo->imo_head) != NULL) 1917 return (0); 1918 1919 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1920 ip_mfilter_init(&imo->imo_head); 1921 imo->imo_multicast_vif = -1; 1922 1923 addr.s_addr = htonl(INADDR_CARP_GROUP); 1924 if ((error = in_joingroup(ifp, &addr, NULL, 1925 &imf->imf_inm)) != 0) { 1926 ip_mfilter_free(imf); 1927 break; 1928 } 1929 1930 ip_mfilter_insert(&imo->imo_head, imf); 1931 imo->imo_multicast_ifp = ifp; 1932 imo->imo_multicast_ttl = CARP_DFLTTL; 1933 imo->imo_multicast_loop = 0; 1934 break; 1935 } 1936 #endif 1937 #ifdef INET6 1938 case AF_INET6: 1939 { 1940 struct ip6_moptions *im6o = &cif->cif_im6o; 1941 struct in6_mfilter *im6f[2]; 1942 struct in6_addr in6; 1943 1944 if (ip6_mfilter_first(&im6o->im6o_head)) 1945 return (0); 1946 1947 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1948 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1949 1950 ip6_mfilter_init(&im6o->im6o_head); 1951 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1952 im6o->im6o_multicast_ifp = ifp; 1953 1954 /* Join IPv6 CARP multicast group. */ 1955 bzero(&in6, sizeof(in6)); 1956 in6.s6_addr16[0] = htons(0xff02); 1957 in6.s6_addr8[15] = 0x12; 1958 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1959 ip6_mfilter_free(im6f[0]); 1960 ip6_mfilter_free(im6f[1]); 1961 break; 1962 } 1963 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1964 ip6_mfilter_free(im6f[0]); 1965 ip6_mfilter_free(im6f[1]); 1966 break; 1967 } 1968 1969 /* Join solicited multicast address. */ 1970 bzero(&in6, sizeof(in6)); 1971 in6.s6_addr16[0] = htons(0xff02); 1972 in6.s6_addr32[1] = 0; 1973 in6.s6_addr32[2] = htonl(1); 1974 in6.s6_addr32[3] = 0; 1975 in6.s6_addr8[12] = 0xff; 1976 1977 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1978 ip6_mfilter_free(im6f[0]); 1979 ip6_mfilter_free(im6f[1]); 1980 break; 1981 } 1982 1983 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1984 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1985 ip6_mfilter_free(im6f[0]); 1986 ip6_mfilter_free(im6f[1]); 1987 break; 1988 } 1989 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1990 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1991 break; 1992 } 1993 #endif 1994 } 1995 1996 return (error); 1997 } 1998 1999 /* 2000 * Free multicast structures. 2001 */ 2002 static void 2003 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 2004 { 2005 #ifdef INET 2006 struct ip_moptions *imo = &cif->cif_imo; 2007 struct in_mfilter *imf; 2008 #endif 2009 #ifdef INET6 2010 struct ip6_moptions *im6o = &cif->cif_im6o; 2011 struct in6_mfilter *im6f; 2012 #endif 2013 sx_assert(&carp_sx, SA_XLOCKED); 2014 2015 switch (sa) { 2016 #ifdef INET 2017 case AF_INET: 2018 if (cif->cif_naddrs != 0) 2019 break; 2020 2021 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2022 ip_mfilter_remove(&imo->imo_head, imf); 2023 in_leavegroup(imf->imf_inm, NULL); 2024 ip_mfilter_free(imf); 2025 } 2026 break; 2027 #endif 2028 #ifdef INET6 2029 case AF_INET6: 2030 if (cif->cif_naddrs6 != 0) 2031 break; 2032 2033 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2034 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2035 in6_leavegroup(im6f->im6f_in6m, NULL); 2036 ip6_mfilter_free(im6f); 2037 } 2038 break; 2039 #endif 2040 } 2041 } 2042 2043 int 2044 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 2045 { 2046 struct m_tag *mtag; 2047 int vhid; 2048 2049 if (!sa) 2050 return (0); 2051 2052 switch (sa->sa_family) { 2053 #ifdef INET 2054 case AF_INET: 2055 break; 2056 #endif 2057 #ifdef INET6 2058 case AF_INET6: 2059 break; 2060 #endif 2061 default: 2062 return (0); 2063 } 2064 2065 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2066 if (mtag == NULL) 2067 return (0); 2068 2069 bcopy(mtag + 1, &vhid, sizeof(vhid)); 2070 2071 /* Set the source MAC address to the Virtual Router MAC Address. */ 2072 switch (ifp->if_type) { 2073 case IFT_ETHER: 2074 case IFT_BRIDGE: 2075 case IFT_L2VLAN: { 2076 struct ether_header *eh; 2077 2078 eh = mtod(m, struct ether_header *); 2079 eh->ether_shost[0] = 0; 2080 eh->ether_shost[1] = 0; 2081 eh->ether_shost[2] = 0x5e; 2082 eh->ether_shost[3] = 0; 2083 eh->ether_shost[4] = 1; 2084 eh->ether_shost[5] = vhid; 2085 } 2086 break; 2087 default: 2088 printf("%s: carp is not supported for the %d interface type\n", 2089 if_name(ifp), ifp->if_type); 2090 return (EOPNOTSUPP); 2091 } 2092 2093 return (0); 2094 } 2095 2096 static struct carp_softc* 2097 carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) 2098 { 2099 struct carp_softc *sc; 2100 struct carp_if *cif; 2101 2102 sx_assert(&carp_sx, SA_XLOCKED); 2103 2104 if ((cif = ifp->if_carp) == NULL) 2105 cif = carp_alloc_if(ifp); 2106 2107 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); 2108 *sc = (struct carp_softc ){ 2109 .sc_vhid = vhid, 2110 .sc_version = version, 2111 .sc_state = INIT, 2112 .sc_carpdev = ifp, 2113 .sc_ifasiz = sizeof(struct ifaddr *), 2114 .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, 2115 }; 2116 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 2117 2118 switch (version) { 2119 case CARP_VERSION_CARP: 2120 sc->sc_advbase = CARP_DFLTINTV; 2121 sc->sc_init_counter = true; 2122 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 2123 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 2124 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 2125 break; 2126 case CARP_VERSION_VRRPv3: 2127 sc->sc_vrrp_adv_inter = 100; 2128 sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; 2129 sc->sc_vrrp_prio = 100; 2130 break; 2131 } 2132 2133 CARP_LOCK_INIT(sc); 2134 #ifdef INET 2135 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2136 #endif 2137 #ifdef INET6 2138 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2139 #endif 2140 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2141 2142 CIF_LOCK(cif); 2143 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 2144 CIF_UNLOCK(cif); 2145 2146 mtx_lock(&carp_mtx); 2147 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 2148 mtx_unlock(&carp_mtx); 2149 2150 return (sc); 2151 } 2152 2153 static void 2154 carp_grow_ifas(struct carp_softc *sc) 2155 { 2156 struct ifaddr **new; 2157 2158 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 2159 CARP_LOCK(sc); 2160 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 2161 free(sc->sc_ifas, M_CARP); 2162 sc->sc_ifas = new; 2163 sc->sc_ifasiz *= 2; 2164 CARP_UNLOCK(sc); 2165 } 2166 2167 static void 2168 carp_destroy(struct carp_softc *sc) 2169 { 2170 struct ifnet *ifp = sc->sc_carpdev; 2171 struct carp_if *cif = ifp->if_carp; 2172 2173 sx_assert(&carp_sx, SA_XLOCKED); 2174 2175 if (sc->sc_suppress) 2176 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 2177 CARP_UNLOCK(sc); 2178 2179 CIF_LOCK(cif); 2180 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 2181 CIF_UNLOCK(cif); 2182 2183 mtx_lock(&carp_mtx); 2184 LIST_REMOVE(sc, sc_next); 2185 mtx_unlock(&carp_mtx); 2186 2187 callout_drain(&sc->sc_ad_tmo); 2188 #ifdef INET 2189 callout_drain(&sc->sc_md_tmo); 2190 #endif 2191 #ifdef INET6 2192 callout_drain(&sc->sc_md6_tmo); 2193 #endif 2194 CARP_LOCK_DESTROY(sc); 2195 2196 free(sc->sc_ifas, M_CARP); 2197 free(sc, M_CARP); 2198 } 2199 2200 static struct carp_if* 2201 carp_alloc_if(struct ifnet *ifp) 2202 { 2203 struct carp_if *cif; 2204 int error; 2205 2206 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 2207 2208 if ((error = ifpromisc(ifp, 1)) != 0) 2209 printf("%s: ifpromisc(%s) failed: %d\n", 2210 __func__, if_name(ifp), error); 2211 else 2212 cif->cif_flags |= CIF_PROMISC; 2213 2214 CIF_LOCK_INIT(cif); 2215 cif->cif_ifp = ifp; 2216 TAILQ_INIT(&cif->cif_vrs); 2217 2218 IF_ADDR_WLOCK(ifp); 2219 ifp->if_carp = cif; 2220 if_ref(ifp); 2221 IF_ADDR_WUNLOCK(ifp); 2222 2223 return (cif); 2224 } 2225 2226 static void 2227 carp_free_if(struct carp_if *cif) 2228 { 2229 struct ifnet *ifp = cif->cif_ifp; 2230 2231 CIF_LOCK_ASSERT(cif); 2232 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 2233 __func__)); 2234 2235 IF_ADDR_WLOCK(ifp); 2236 ifp->if_carp = NULL; 2237 IF_ADDR_WUNLOCK(ifp); 2238 2239 CIF_LOCK_DESTROY(cif); 2240 2241 if (cif->cif_flags & CIF_PROMISC) 2242 ifpromisc(ifp, 0); 2243 if_rele(ifp); 2244 2245 free(cif, M_CARP); 2246 } 2247 2248 static bool 2249 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 2250 { 2251 struct carpreq *carpr = arg; 2252 2253 CARP_LOCK(sc); 2254 carpr->carpr_state = sc->sc_state; 2255 carpr->carpr_vhid = sc->sc_vhid; 2256 switch (sc->sc_version) { 2257 case CARP_VERSION_CARP: 2258 carpr->carpr_advbase = sc->sc_advbase; 2259 carpr->carpr_advskew = sc->sc_advskew; 2260 if (priv) 2261 bcopy(sc->sc_key, carpr->carpr_key, 2262 sizeof(carpr->carpr_key)); 2263 else 2264 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 2265 break; 2266 case CARP_VERSION_VRRPv3: 2267 break; 2268 } 2269 CARP_UNLOCK(sc); 2270 2271 return (true); 2272 } 2273 2274 static int 2275 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 2276 { 2277 struct epoch_tracker et; 2278 struct carp_softc *sc = NULL; 2279 int error = 0; 2280 2281 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) 2282 return (EINVAL); 2283 2284 switch (carpr->carpr_version) { 2285 case CARP_VERSION_CARP: 2286 if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || 2287 carpr->carpr_advbase < CARP_DFLTINTV)) 2288 return (EINVAL); 2289 if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) 2290 return (EINVAL); 2291 break; 2292 case CARP_VERSION_VRRPv3: 2293 /* XXXGL: shouldn't we check anything? */ 2294 break; 2295 default: 2296 return (EINVAL); 2297 } 2298 2299 if (ifp->if_carp) { 2300 IFNET_FOREACH_CARP(ifp, sc) 2301 if (sc->sc_vhid == carpr->carpr_vhid) 2302 break; 2303 } 2304 2305 if (sc == NULL) 2306 sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); 2307 else if (sc->sc_version != carpr->carpr_version) 2308 return (EINVAL); 2309 2310 CARP_LOCK(sc); 2311 switch (sc->sc_version) { 2312 case CARP_VERSION_CARP: 2313 if (carpr->carpr_advbase != 0) 2314 sc->sc_advbase = carpr->carpr_advbase; 2315 sc->sc_advskew = carpr->carpr_advskew; 2316 if (carpr->carpr_addr.s_addr != INADDR_ANY) 2317 sc->sc_carpaddr = carpr->carpr_addr; 2318 if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 2319 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 2320 sizeof(sc->sc_carpaddr6)); 2321 } 2322 if (carpr->carpr_key[0] != '\0') { 2323 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2324 carp_hmac_prepare(sc); 2325 } 2326 break; 2327 case CARP_VERSION_VRRPv3: 2328 if (carpr->carpr_vrrp_priority != 0) 2329 sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; 2330 if (carpr->carpr_vrrp_adv_inter) 2331 sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; 2332 break; 2333 } 2334 2335 if (sc->sc_state != INIT && 2336 carpr->carpr_state != sc->sc_state) { 2337 switch (carpr->carpr_state) { 2338 case BACKUP: 2339 callout_stop(&sc->sc_ad_tmo); 2340 carp_set_state(sc, BACKUP, 2341 "user requested via ifconfig"); 2342 carp_setrun(sc, 0); 2343 carp_delroute(sc); 2344 break; 2345 case MASTER: 2346 NET_EPOCH_ENTER(et); 2347 carp_master_down_locked(sc, 2348 "user requested via ifconfig"); 2349 NET_EPOCH_EXIT(et); 2350 break; 2351 default: 2352 break; 2353 } 2354 } 2355 CARP_UNLOCK(sc); 2356 2357 return (error); 2358 } 2359 2360 static int 2361 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 2362 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 2363 { 2364 int priveleged; 2365 struct carp_softc *sc; 2366 2367 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 2368 return (EINVAL); 2369 if (carpr->carpr_count < 1) 2370 return (EMSGSIZE); 2371 if (ifp->if_carp == NULL) 2372 return (ENOENT); 2373 2374 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 2375 if (carpr->carpr_vhid != 0) { 2376 IFNET_FOREACH_CARP(ifp, sc) 2377 if (sc->sc_vhid == carpr->carpr_vhid) 2378 break; 2379 if (sc == NULL) 2380 return (ENOENT); 2381 2382 if (! outfn(arg, sc, priveleged)) 2383 return (ENOMEM); 2384 carpr->carpr_count = 1; 2385 } else { 2386 int count; 2387 2388 count = 0; 2389 IFNET_FOREACH_CARP(ifp, sc) 2390 count++; 2391 2392 if (count > carpr->carpr_count) 2393 return (EMSGSIZE); 2394 2395 IFNET_FOREACH_CARP(ifp, sc) { 2396 if (! outfn(arg, sc, priveleged)) 2397 return (ENOMEM); 2398 carpr->carpr_count = count; 2399 } 2400 } 2401 2402 return (0); 2403 } 2404 2405 int 2406 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 2407 { 2408 struct carpreq carpr; 2409 struct carpkreq carprk = { 2410 .carpr_version = CARP_VERSION_CARP, 2411 }; 2412 struct ifnet *ifp; 2413 int error = 0; 2414 2415 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 2416 return (error); 2417 2418 ifp = ifunit_ref(ifr->ifr_name); 2419 if ((error = carp_is_supported_if(ifp)) != 0) 2420 goto out; 2421 2422 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2423 error = EADDRNOTAVAIL; 2424 goto out; 2425 } 2426 2427 sx_xlock(&carp_sx); 2428 switch (cmd) { 2429 case SIOCSVH: 2430 if ((error = priv_check(td, PRIV_NETINET_CARP))) 2431 break; 2432 2433 memcpy(&carprk, &carpr, sizeof(carpr)); 2434 error = carp_ioctl_set(ifp, &carprk); 2435 break; 2436 2437 case SIOCGVH: 2438 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 2439 carp_carprcp, &carpr); 2440 if (error == 0) { 2441 error = copyout(&carpr, 2442 (char *)ifr_data_get_ptr(ifr), 2443 carpr.carpr_count * sizeof(carpr)); 2444 } 2445 break; 2446 default: 2447 error = EINVAL; 2448 } 2449 sx_xunlock(&carp_sx); 2450 2451 out: 2452 if (ifp != NULL) 2453 if_rele(ifp); 2454 2455 return (error); 2456 } 2457 2458 static int 2459 carp_get_vhid(struct ifaddr *ifa) 2460 { 2461 2462 if (ifa == NULL || ifa->ifa_carp == NULL) 2463 return (0); 2464 2465 return (ifa->ifa_carp->sc_vhid); 2466 } 2467 2468 int 2469 carp_attach(struct ifaddr *ifa, int vhid) 2470 { 2471 struct ifnet *ifp = ifa->ifa_ifp; 2472 struct carp_if *cif = ifp->if_carp; 2473 struct carp_softc *sc; 2474 int index, error; 2475 2476 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 2477 2478 switch (ifa->ifa_addr->sa_family) { 2479 #ifdef INET 2480 case AF_INET: 2481 #endif 2482 #ifdef INET6 2483 case AF_INET6: 2484 #endif 2485 break; 2486 default: 2487 return (EPROTOTYPE); 2488 } 2489 2490 sx_xlock(&carp_sx); 2491 if (ifp->if_carp == NULL) { 2492 sx_xunlock(&carp_sx); 2493 return (ENOPROTOOPT); 2494 } 2495 2496 IFNET_FOREACH_CARP(ifp, sc) 2497 if (sc->sc_vhid == vhid) 2498 break; 2499 if (sc == NULL) { 2500 sx_xunlock(&carp_sx); 2501 return (ENOENT); 2502 } 2503 2504 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2505 if (error) { 2506 CIF_FREE(cif); 2507 sx_xunlock(&carp_sx); 2508 return (error); 2509 } 2510 2511 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2512 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2513 carp_grow_ifas(sc); 2514 2515 switch (ifa->ifa_addr->sa_family) { 2516 #ifdef INET 2517 case AF_INET: 2518 cif->cif_naddrs++; 2519 sc->sc_naddrs++; 2520 break; 2521 #endif 2522 #ifdef INET6 2523 case AF_INET6: 2524 cif->cif_naddrs6++; 2525 sc->sc_naddrs6++; 2526 break; 2527 #endif 2528 } 2529 2530 ifa_ref(ifa); 2531 2532 CARP_LOCK(sc); 2533 sc->sc_ifas[index - 1] = ifa; 2534 ifa->ifa_carp = sc; 2535 if (sc->sc_version == CARP_VERSION_CARP) 2536 carp_hmac_prepare(sc); 2537 carp_sc_state(sc); 2538 CARP_UNLOCK(sc); 2539 2540 sx_xunlock(&carp_sx); 2541 2542 return (0); 2543 } 2544 2545 void 2546 carp_detach(struct ifaddr *ifa, bool keep_cif) 2547 { 2548 struct ifnet *ifp = ifa->ifa_ifp; 2549 struct carp_if *cif = ifp->if_carp; 2550 struct carp_softc *sc = ifa->ifa_carp; 2551 int i, index; 2552 2553 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2554 2555 sx_xlock(&carp_sx); 2556 2557 CARP_LOCK(sc); 2558 /* Shift array. */ 2559 index = sc->sc_naddrs + sc->sc_naddrs6; 2560 for (i = 0; i < index; i++) 2561 if (sc->sc_ifas[i] == ifa) 2562 break; 2563 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2564 for (; i < index - 1; i++) 2565 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2566 sc->sc_ifas[index - 1] = NULL; 2567 2568 switch (ifa->ifa_addr->sa_family) { 2569 #ifdef INET 2570 case AF_INET: 2571 cif->cif_naddrs--; 2572 sc->sc_naddrs--; 2573 break; 2574 #endif 2575 #ifdef INET6 2576 case AF_INET6: 2577 cif->cif_naddrs6--; 2578 sc->sc_naddrs6--; 2579 break; 2580 #endif 2581 } 2582 2583 carp_ifa_delroute(ifa); 2584 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2585 2586 ifa->ifa_carp = NULL; 2587 ifa_free(ifa); 2588 2589 if (sc->sc_version == CARP_VERSION_CARP) 2590 carp_hmac_prepare(sc); 2591 carp_sc_state(sc); 2592 2593 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2594 carp_destroy(sc); 2595 else 2596 CARP_UNLOCK(sc); 2597 2598 if (!keep_cif) 2599 CIF_FREE(cif); 2600 2601 sx_xunlock(&carp_sx); 2602 } 2603 2604 static void 2605 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2606 { 2607 2608 CARP_LOCK_ASSERT(sc); 2609 2610 if (sc->sc_state != state) { 2611 const char *carp_states[] = { CARP_STATES }; 2612 char subsys[IFNAMSIZ+5]; 2613 2614 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2615 if_name(sc->sc_carpdev)); 2616 2617 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2618 carp_states[sc->sc_state], carp_states[state], reason); 2619 2620 sc->sc_state = state; 2621 2622 devctl_notify("CARP", subsys, carp_states[state], NULL); 2623 } 2624 } 2625 2626 static void 2627 carp_linkstate(struct ifnet *ifp) 2628 { 2629 struct carp_softc *sc; 2630 2631 CIF_LOCK(ifp->if_carp); 2632 IFNET_FOREACH_CARP(ifp, sc) { 2633 CARP_LOCK(sc); 2634 carp_sc_state(sc); 2635 CARP_UNLOCK(sc); 2636 } 2637 CIF_UNLOCK(ifp->if_carp); 2638 } 2639 2640 static void 2641 carp_sc_state(struct carp_softc *sc) 2642 { 2643 2644 CARP_LOCK_ASSERT(sc); 2645 2646 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2647 !(sc->sc_carpdev->if_flags & IFF_UP) || 2648 !V_carp_allow) { 2649 callout_stop(&sc->sc_ad_tmo); 2650 #ifdef INET 2651 callout_stop(&sc->sc_md_tmo); 2652 #endif 2653 #ifdef INET6 2654 callout_stop(&sc->sc_md6_tmo); 2655 #endif 2656 carp_set_state(sc, INIT, "hardware interface down"); 2657 carp_setrun(sc, 0); 2658 carp_delroute(sc); 2659 if (!sc->sc_suppress) 2660 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2661 sc->sc_suppress = 1; 2662 } else { 2663 carp_set_state(sc, INIT, "hardware interface up"); 2664 carp_setrun(sc, 0); 2665 if (sc->sc_suppress) 2666 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2667 sc->sc_suppress = 0; 2668 } 2669 } 2670 2671 static void 2672 carp_demote_adj(int adj, char *reason) 2673 { 2674 atomic_add_int(&V_carp_demotion, adj); 2675 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2676 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2677 } 2678 2679 static int 2680 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2681 { 2682 int new, error; 2683 struct carp_softc *sc; 2684 2685 new = V_carp_allow; 2686 error = sysctl_handle_int(oidp, &new, 0, req); 2687 if (error || !req->newptr) 2688 return (error); 2689 2690 if (V_carp_allow != new) { 2691 V_carp_allow = new; 2692 2693 mtx_lock(&carp_mtx); 2694 LIST_FOREACH(sc, &carp_list, sc_next) { 2695 CARP_LOCK(sc); 2696 if (curvnet == sc->sc_carpdev->if_vnet) 2697 carp_sc_state(sc); 2698 CARP_UNLOCK(sc); 2699 } 2700 mtx_unlock(&carp_mtx); 2701 } 2702 2703 return (0); 2704 } 2705 2706 static int 2707 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2708 { 2709 int new, error; 2710 2711 new = V_carp_dscp; 2712 error = sysctl_handle_int(oidp, &new, 0, req); 2713 if (error || !req->newptr) 2714 return (error); 2715 2716 if (new < 0 || new > 63) 2717 return (EINVAL); 2718 2719 V_carp_dscp = new; 2720 2721 return (0); 2722 } 2723 2724 static int 2725 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2726 { 2727 int new, error; 2728 2729 new = V_carp_demotion; 2730 error = sysctl_handle_int(oidp, &new, 0, req); 2731 if (error || !req->newptr) 2732 return (error); 2733 2734 carp_demote_adj(new, "sysctl"); 2735 2736 return (0); 2737 } 2738 2739 static int 2740 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2741 { 2742 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2743 return (EINVAL); 2744 2745 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2746 return (0); 2747 } 2748 2749 struct carp_nl_send_args { 2750 struct nlmsghdr *hdr; 2751 struct nl_pstate *npt; 2752 }; 2753 2754 static bool 2755 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2756 { 2757 struct carp_nl_send_args *nlsa = arg; 2758 struct nlmsghdr *hdr = nlsa->hdr; 2759 struct nl_pstate *npt = nlsa->npt; 2760 struct nl_writer *nw = npt->nw; 2761 struct genlmsghdr *ghdr_new; 2762 2763 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2764 nlmsg_abort(nw); 2765 return (false); 2766 } 2767 2768 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2769 if (ghdr_new == NULL) { 2770 nlmsg_abort(nw); 2771 return (false); 2772 } 2773 2774 ghdr_new->cmd = CARP_NL_CMD_GET; 2775 ghdr_new->version = 0; 2776 ghdr_new->reserved = 0; 2777 2778 CARP_LOCK(sc); 2779 2780 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2781 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2782 nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); 2783 switch (sc->sc_version) { 2784 case CARP_VERSION_CARP: 2785 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2786 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2787 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2788 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2789 if (priv) 2790 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), 2791 sc->sc_key); 2792 break; 2793 case CARP_VERSION_VRRPv3: 2794 nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); 2795 nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, 2796 sc->sc_vrrp_adv_inter); 2797 break; 2798 } 2799 2800 CARP_UNLOCK(sc); 2801 2802 if (! nlmsg_end(nw)) { 2803 nlmsg_abort(nw); 2804 return (false); 2805 } 2806 2807 return (true); 2808 } 2809 2810 struct nl_carp_parsed { 2811 unsigned int ifindex; 2812 char *ifname; 2813 uint32_t state; 2814 uint32_t vhid; 2815 int32_t advbase; 2816 int32_t advskew; 2817 char key[CARP_KEY_LEN]; 2818 struct in_addr addr; 2819 struct in6_addr addr6; 2820 carp_version_t version; 2821 uint8_t vrrp_prio; 2822 uint16_t vrrp_adv_inter; 2823 }; 2824 2825 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2826 static const struct nlattr_parser nla_p_set[] = { 2827 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2828 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2829 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2830 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2831 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2832 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2833 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2834 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2835 { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, 2836 { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, 2837 { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, 2838 { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, 2839 }; 2840 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_empty, nla_p_set); 2841 #undef _OUT 2842 2843 2844 static int 2845 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2846 { 2847 struct nl_carp_parsed attrs = { }; 2848 struct carp_nl_send_args args; 2849 struct carpreq carpr = { }; 2850 struct epoch_tracker et; 2851 if_t ifp = NULL; 2852 int error; 2853 2854 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2855 if (error != 0) 2856 return (error); 2857 2858 NET_EPOCH_ENTER(et); 2859 if (attrs.ifname != NULL) 2860 ifp = ifunit_ref(attrs.ifname); 2861 else if (attrs.ifindex != 0) 2862 ifp = ifnet_byindex_ref(attrs.ifindex); 2863 NET_EPOCH_EXIT(et); 2864 2865 if ((error = carp_is_supported_if(ifp)) != 0) 2866 goto out; 2867 2868 hdr->nlmsg_flags |= NLM_F_MULTI; 2869 args.hdr = hdr; 2870 args.npt = npt; 2871 2872 carpr.carpr_vhid = attrs.vhid; 2873 carpr.carpr_count = CARP_MAXVHID; 2874 2875 sx_xlock(&carp_sx); 2876 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2877 carp_nl_send, &args); 2878 sx_xunlock(&carp_sx); 2879 2880 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2881 error = ENOMEM; 2882 2883 out: 2884 if (ifp != NULL) 2885 if_rele(ifp); 2886 2887 return (error); 2888 } 2889 2890 static int 2891 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2892 { 2893 struct nl_carp_parsed attrs = { }; 2894 struct carpkreq carpr; 2895 struct epoch_tracker et; 2896 if_t ifp = NULL; 2897 int error; 2898 2899 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2900 if (error != 0) 2901 return (error); 2902 2903 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2904 return (EINVAL); 2905 if (attrs.state > CARP_MAXSTATE) 2906 return (EINVAL); 2907 if (attrs.version == 0) /* compat with pre-VRRPv3 */ 2908 attrs.version = CARP_VERSION_CARP; 2909 switch (attrs.version) { 2910 case CARP_VERSION_CARP: 2911 if (attrs.advbase < 0 || attrs.advskew < 0) 2912 return (EINVAL); 2913 if (attrs.advbase > 255) 2914 return (EINVAL); 2915 if (attrs.advskew >= 255) 2916 return (EINVAL); 2917 break; 2918 case CARP_VERSION_VRRPv3: 2919 if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) 2920 return (EINVAL); 2921 break; 2922 default: 2923 return (EINVAL); 2924 } 2925 2926 NET_EPOCH_ENTER(et); 2927 if (attrs.ifname != NULL) 2928 ifp = ifunit_ref(attrs.ifname); 2929 else if (attrs.ifindex != 0) 2930 ifp = ifnet_byindex_ref(attrs.ifindex); 2931 NET_EPOCH_EXIT(et); 2932 2933 if ((error = carp_is_supported_if(ifp)) != 0) 2934 goto out; 2935 2936 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2937 error = EADDRNOTAVAIL; 2938 goto out; 2939 } 2940 2941 carpr.carpr_count = 1; 2942 carpr.carpr_vhid = attrs.vhid; 2943 carpr.carpr_state = attrs.state; 2944 carpr.carpr_version = attrs.version; 2945 switch (attrs.version) { 2946 case CARP_VERSION_CARP: 2947 carpr.carpr_advbase = attrs.advbase; 2948 carpr.carpr_advskew = attrs.advskew; 2949 carpr.carpr_addr = attrs.addr; 2950 carpr.carpr_addr6 = attrs.addr6; 2951 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2952 break; 2953 case CARP_VERSION_VRRPv3: 2954 carpr.carpr_vrrp_priority = attrs.vrrp_prio; 2955 carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; 2956 break; 2957 } 2958 2959 sx_xlock(&carp_sx); 2960 error = carp_ioctl_set(ifp, &carpr); 2961 sx_xunlock(&carp_sx); 2962 2963 out: 2964 if (ifp != NULL) 2965 if_rele(ifp); 2966 2967 return (error); 2968 } 2969 2970 static const struct nlhdr_parser *all_parsers[] = { 2971 &carp_parser 2972 }; 2973 2974 static const struct genl_cmd carp_cmds[] = { 2975 { 2976 .cmd_num = CARP_NL_CMD_GET, 2977 .cmd_name = "SIOCGVH", 2978 .cmd_cb = carp_nl_get, 2979 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2980 GENL_CMD_CAP_HASPOL, 2981 }, 2982 { 2983 .cmd_num = CARP_NL_CMD_SET, 2984 .cmd_name = "SIOCSVH", 2985 .cmd_cb = carp_nl_set, 2986 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2987 .cmd_priv = PRIV_NETINET_CARP, 2988 }, 2989 }; 2990 2991 static uint16_t carp_family_id; 2992 static void 2993 carp_nl_register(void) 2994 { 2995 bool ret __diagused; 2996 2997 NL_VERIFY_PARSERS(all_parsers); 2998 carp_family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 2999 CARP_NL_CMD_MAX); 3000 MPASS(carp_family_id != 0); 3001 3002 ret = genl_register_cmds(carp_family_id, carp_cmds, nitems(carp_cmds)); 3003 MPASS(ret); 3004 } 3005 3006 static void 3007 carp_nl_unregister(void) 3008 { 3009 genl_unregister_family(carp_family_id); 3010 } 3011 3012 static void 3013 carp_mod_cleanup(void) 3014 { 3015 3016 carp_nl_unregister(); 3017 3018 #ifdef INET 3019 (void)ipproto_unregister(IPPROTO_CARP); 3020 carp_iamatch_p = NULL; 3021 #endif 3022 #ifdef INET6 3023 (void)ip6proto_unregister(IPPROTO_CARP); 3024 carp_iamatch6_p = NULL; 3025 carp_macmatch6_p = NULL; 3026 #endif 3027 carp_ioctl_p = NULL; 3028 carp_attach_p = NULL; 3029 carp_detach_p = NULL; 3030 carp_get_vhid_p = NULL; 3031 carp_linkstate_p = NULL; 3032 carp_forus_p = NULL; 3033 carp_output_p = NULL; 3034 carp_demote_adj_p = NULL; 3035 carp_master_p = NULL; 3036 mtx_unlock(&carp_mtx); 3037 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 3038 mtx_destroy(&carp_mtx); 3039 sx_destroy(&carp_sx); 3040 } 3041 3042 static void 3043 ipcarp_sysinit(void) 3044 { 3045 3046 /* Load allow as tunable so to postpone carp start after module load */ 3047 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 3048 } 3049 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 3050 3051 static int 3052 carp_mod_load(void) 3053 { 3054 int err; 3055 3056 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 3057 sx_init(&carp_sx, "carp_sx"); 3058 carp_get_vhid_p = carp_get_vhid; 3059 carp_forus_p = carp_forus; 3060 carp_output_p = carp_output; 3061 carp_linkstate_p = carp_linkstate; 3062 carp_ioctl_p = carp_ioctl; 3063 carp_attach_p = carp_attach; 3064 carp_detach_p = carp_detach; 3065 carp_demote_adj_p = carp_demote_adj; 3066 carp_master_p = carp_master; 3067 #ifdef INET6 3068 carp_iamatch6_p = carp_iamatch6; 3069 carp_macmatch6_p = carp_macmatch6; 3070 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 3071 if (err) { 3072 printf("carp: error %d registering with INET6\n", err); 3073 carp_mod_cleanup(); 3074 return (err); 3075 } 3076 #endif 3077 #ifdef INET 3078 carp_iamatch_p = carp_iamatch; 3079 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 3080 if (err) { 3081 printf("carp: error %d registering with INET\n", err); 3082 carp_mod_cleanup(); 3083 return (err); 3084 } 3085 #endif 3086 3087 carp_nl_register(); 3088 3089 return (0); 3090 } 3091 3092 static int 3093 carp_modevent(module_t mod, int type, void *data) 3094 { 3095 switch (type) { 3096 case MOD_LOAD: 3097 return carp_mod_load(); 3098 /* NOTREACHED */ 3099 case MOD_UNLOAD: 3100 mtx_lock(&carp_mtx); 3101 if (LIST_EMPTY(&carp_list)) 3102 carp_mod_cleanup(); 3103 else { 3104 mtx_unlock(&carp_mtx); 3105 return (EBUSY); 3106 } 3107 break; 3108 3109 default: 3110 return (EINVAL); 3111 } 3112 3113 return (0); 3114 } 3115 3116 static moduledata_t carp_mod = { 3117 "carp", 3118 carp_modevent, 3119 0 3120 }; 3121 3122 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 3123