1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/devctl.h> 39 #include <sys/jail.h> 40 #include <sys/kassert.h> 41 #include <sys/kernel.h> 42 #include <sys/limits.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/module.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_dl.h> 59 #include <net/if_llatbl.h> 60 #include <net/if_private.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/vnet.h> 64 65 #if defined(INET) || defined(INET6) 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/ip_carp.h> 69 #include <netinet/ip_carp_nl.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <netlink/netlink.h> 88 #include <netlink/netlink_ctl.h> 89 #include <netlink/netlink_generic.h> 90 #include <netlink/netlink_message_parser.h> 91 92 #include <crypto/sha1.h> 93 94 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 95 96 struct carp_softc { 97 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 98 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 99 carp_version_t sc_version; /* carp or VRRPv3 */ 100 uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ 101 struct callout sc_ad_tmo; /* Advertising timeout. */ 102 #ifdef INET 103 struct callout sc_md_tmo; /* Master down timeout. */ 104 #endif 105 #ifdef INET6 106 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 107 #endif 108 struct mtx sc_mtx; 109 110 int sc_vhid; 111 union { 112 struct { /* sc_version == CARP_VERSION_CARP */ 113 int sc_advskew; 114 int sc_advbase; 115 struct in_addr sc_carpaddr; 116 struct in6_addr sc_carpaddr6; 117 uint64_t sc_counter; 118 bool sc_init_counter; 119 #define CARP_HMAC_PAD 64 120 unsigned char sc_key[CARP_KEY_LEN]; 121 unsigned char sc_pad[CARP_HMAC_PAD]; 122 SHA1_CTX sc_sha1; 123 }; 124 struct { /* sc_version == CARP_VERSION_VRRPv3 */ 125 uint8_t sc_vrrp_prio; 126 uint16_t sc_vrrp_adv_inter; 127 uint16_t sc_vrrp_master_inter; 128 }; 129 }; 130 int sc_naddrs; 131 int sc_naddrs6; 132 int sc_ifasiz; 133 enum { INIT = 0, BACKUP, MASTER } sc_state; 134 int sc_suppress; 135 int sc_sendad_errors; 136 #define CARP_SENDAD_MAX_ERRORS 3 137 int sc_sendad_success; 138 #define CARP_SENDAD_MIN_SUCCESS 3 139 140 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 141 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 142 }; 143 144 struct carp_if { 145 #ifdef INET 146 int cif_naddrs; 147 #endif 148 #ifdef INET6 149 int cif_naddrs6; 150 #endif 151 TAILQ_HEAD(, carp_softc) cif_vrs; 152 #ifdef INET 153 struct ip_moptions cif_imo; 154 #endif 155 #ifdef INET6 156 struct ip6_moptions cif_im6o; 157 #endif 158 struct ifnet *cif_ifp; 159 struct mtx cif_mtx; 160 uint32_t cif_flags; 161 #define CIF_PROMISC 0x00000001 162 }; 163 164 /* Kernel equivalent of struct carpreq, but with more fields for new features. 165 * */ 166 struct carpkreq { 167 int carpr_count; 168 int carpr_vhid; 169 int carpr_state; 170 int carpr_advskew; 171 int carpr_advbase; 172 unsigned char carpr_key[CARP_KEY_LEN]; 173 /* Everything above this is identical to carpreq */ 174 struct in_addr carpr_addr; 175 struct in6_addr carpr_addr6; 176 carp_version_t carpr_version; 177 uint8_t carpr_vrrp_priority; 178 uint16_t carpr_vrrp_adv_inter; 179 }; 180 181 /* 182 * Brief design of carp(4). 183 * 184 * Any carp-capable ifnet may have a list of carp softcs hanging off 185 * its ifp->if_carp pointer. Each softc represents one unique virtual 186 * host id, or vhid. The softc has a back pointer to the ifnet. All 187 * softcs are joined in a global list, which has quite limited use. 188 * 189 * Any interface address that takes part in CARP negotiation has a 190 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 191 * AF_INET or AF_INET6 address. 192 * 193 * Although, one can get the softc's backpointer to ifnet and traverse 194 * through its ifp->if_addrhead queue to find all interface addresses 195 * involved in CARP, we keep a growable array of ifaddr pointers. This 196 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 197 * do calls into the network stack, thus avoiding LORs. 198 * 199 * Locking: 200 * 201 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 202 * callout-driven events and ioctl()s. 203 * 204 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 205 * To traverse the global list we use the mutex carp_mtx. 206 * 207 * Known issues with locking: 208 * 209 * - On module unload we may race (?) with packet processing thread 210 * dereferencing our function pointers. 211 */ 212 213 /* Accept incoming CARP packets. */ 214 VNET_DEFINE_STATIC(int, carp_allow) = 1; 215 #define V_carp_allow VNET(carp_allow) 216 217 /* Set DSCP in outgoing CARP packets. */ 218 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 219 #define V_carp_dscp VNET(carp_dscp) 220 221 /* Preempt slower nodes. */ 222 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 223 #define V_carp_preempt VNET(carp_preempt) 224 225 /* Log level. */ 226 VNET_DEFINE_STATIC(int, carp_log) = 1; 227 #define V_carp_log VNET(carp_log) 228 229 /* Global advskew demotion. */ 230 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 231 #define V_carp_demotion VNET(carp_demotion) 232 233 /* Send error demotion factor. */ 234 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 235 #define V_carp_senderr_adj VNET(carp_senderr_adj) 236 237 /* Iface down demotion factor. */ 238 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 239 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 240 241 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 242 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 243 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 244 245 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 246 "CARP"); 247 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 248 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 249 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 250 "Accept incoming CARP packets"); 251 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 252 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 253 0, 0, carp_dscp_sysctl, "I", 254 "DSCP value for carp packets"); 255 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 256 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 257 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 258 &VNET_NAME(carp_log), 0, "CARP log level"); 259 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 260 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 261 0, 0, carp_demote_adj_sysctl, "I", 262 "Adjust demotion factor (skew of advskew)"); 263 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 264 CTLFLAG_VNET | CTLFLAG_RW, 265 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 266 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 267 CTLFLAG_VNET | CTLFLAG_RW, 268 &VNET_NAME(carp_ifdown_adj), 0, 269 "Interface down demotion factor adjustment"); 270 271 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 272 VNET_PCPUSTAT_SYSINIT(carpstats); 273 VNET_PCPUSTAT_SYSUNINIT(carpstats); 274 275 #define CARPSTATS_ADD(name, val) \ 276 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 277 sizeof(uint64_t)], (val)) 278 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 279 280 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 281 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 282 283 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 284 NULL, MTX_DEF) 285 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 286 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 287 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 288 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 289 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 290 NULL, MTX_DEF) 291 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 292 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 293 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 294 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 295 #define CIF_FREE(cif) do { \ 296 CIF_LOCK(cif); \ 297 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 298 carp_free_if(cif); \ 299 else \ 300 CIF_UNLOCK(cif); \ 301 } while (0) 302 303 #define CARP_LOG(...) do { \ 304 if (V_carp_log > 0) \ 305 log(LOG_INFO, "carp: " __VA_ARGS__); \ 306 } while (0) 307 308 #define CARP_DEBUG(...) do { \ 309 if (V_carp_log > 1) \ 310 log(LOG_DEBUG, __VA_ARGS__); \ 311 } while (0) 312 313 #define IFNET_FOREACH_IFA(ifp, ifa) \ 314 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 315 if ((ifa)->ifa_carp != NULL) 316 317 #define CARP_FOREACH_IFA(sc, ifa) \ 318 CARP_LOCK_ASSERT(sc); \ 319 for (int _i = 0; \ 320 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 321 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 322 ++_i) 323 324 #define IFNET_FOREACH_CARP(ifp, sc) \ 325 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 326 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 327 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 328 329 #define DEMOTE_ADVSKEW(sc) \ 330 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 331 CARP_MAXSKEW : \ 332 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 333 0 : ((sc)->sc_advskew + V_carp_demotion))) 334 335 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 336 static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); 337 static struct carp_softc 338 *carp_alloc(struct ifnet *, carp_version_t, int); 339 static void carp_destroy(struct carp_softc *); 340 static struct carp_if 341 *carp_alloc_if(struct ifnet *); 342 static void carp_free_if(struct carp_if *); 343 static void carp_set_state(struct carp_softc *, int, const char* reason); 344 static void carp_sc_state(struct carp_softc *); 345 static void carp_setrun(struct carp_softc *, sa_family_t); 346 static void carp_master_down(void *); 347 static void carp_master_down_locked(struct carp_softc *, 348 const char* reason); 349 static void carp_send_ad_locked(struct carp_softc *); 350 static void vrrp_send_ad_locked(struct carp_softc *); 351 static void carp_addroute(struct carp_softc *); 352 static void carp_ifa_addroute(struct ifaddr *); 353 static void carp_delroute(struct carp_softc *); 354 static void carp_ifa_delroute(struct ifaddr *); 355 static void carp_send_ad_all(void *, int); 356 static void carp_demote_adj(int, char *); 357 358 static LIST_HEAD(, carp_softc) carp_list = LIST_HEAD_INITIALIZER(carp_list); 359 static struct mtx carp_mtx; 360 static struct sx carp_sx; 361 static struct task carp_sendall_task = 362 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 363 364 static int 365 carp_is_supported_if(if_t ifp) 366 { 367 if (ifp == NULL) 368 return (ENXIO); 369 370 switch (ifp->if_type) { 371 case IFT_ETHER: 372 case IFT_L2VLAN: 373 case IFT_BRIDGE: 374 break; 375 default: 376 return (EOPNOTSUPP); 377 } 378 379 return (0); 380 } 381 382 static void 383 carp_hmac_prepare(struct carp_softc *sc) 384 { 385 uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; 386 uint8_t vhid = sc->sc_vhid & 0xff; 387 struct ifaddr *ifa; 388 int i, found; 389 #ifdef INET 390 struct in_addr last, cur, in; 391 #endif 392 #ifdef INET6 393 struct in6_addr last6, cur6, in6; 394 #endif 395 396 CARP_LOCK_ASSERT(sc); 397 MPASS(sc->sc_version == CARP_VERSION_CARP); 398 399 /* Compute ipad from key. */ 400 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 401 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 402 for (i = 0; i < sizeof(sc->sc_pad); i++) 403 sc->sc_pad[i] ^= 0x36; 404 405 /* Precompute first part of inner hash. */ 406 SHA1Init(&sc->sc_sha1); 407 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 408 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 409 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 410 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 411 #ifdef INET 412 cur.s_addr = 0; 413 do { 414 found = 0; 415 last = cur; 416 cur.s_addr = 0xffffffff; 417 CARP_FOREACH_IFA(sc, ifa) { 418 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 419 if (ifa->ifa_addr->sa_family == AF_INET && 420 ntohl(in.s_addr) > ntohl(last.s_addr) && 421 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 422 cur.s_addr = in.s_addr; 423 found++; 424 } 425 } 426 if (found) 427 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 428 } while (found); 429 #endif /* INET */ 430 #ifdef INET6 431 memset(&cur6, 0, sizeof(cur6)); 432 do { 433 found = 0; 434 last6 = cur6; 435 memset(&cur6, 0xff, sizeof(cur6)); 436 CARP_FOREACH_IFA(sc, ifa) { 437 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 438 if (IN6_IS_SCOPE_EMBED(&in6)) 439 in6.s6_addr16[1] = 0; 440 if (ifa->ifa_addr->sa_family == AF_INET6 && 441 memcmp(&in6, &last6, sizeof(in6)) > 0 && 442 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 443 cur6 = in6; 444 found++; 445 } 446 } 447 if (found) 448 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 449 } while (found); 450 #endif /* INET6 */ 451 452 /* convert ipad to opad */ 453 for (i = 0; i < sizeof(sc->sc_pad); i++) 454 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 455 } 456 457 static void 458 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 459 unsigned char md[20]) 460 { 461 SHA1_CTX sha1ctx; 462 463 CARP_LOCK_ASSERT(sc); 464 465 /* fetch first half of inner hash */ 466 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 467 468 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 469 SHA1Final(md, &sha1ctx); 470 471 /* outer hash */ 472 SHA1Init(&sha1ctx); 473 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 474 SHA1Update(&sha1ctx, md, 20); 475 SHA1Final(md, &sha1ctx); 476 } 477 478 static int 479 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 480 unsigned char md[20]) 481 { 482 unsigned char md2[20]; 483 484 CARP_LOCK_ASSERT(sc); 485 486 carp_hmac_generate(sc, counter, md2); 487 488 return (bcmp(md, md2, sizeof(md2))); 489 } 490 491 static int 492 vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) 493 { 494 uint16_t cksum; 495 496 /* 497 * Note that VRRPv3 checksums are different from CARP checksums. 498 * Carp just calculates the checksum over the packet. 499 * VRRPv3 includes the pseudo-header checksum as well. 500 */ 501 cksum = in_cksum_skip(m, off + len, off); 502 cksum -= phdrcksum; 503 504 return (cksum); 505 } 506 507 /* 508 * process input packet. 509 * we have rearranged checks order compared to the rfc, 510 * but it seems more efficient this way or not possible otherwise. 511 */ 512 #ifdef INET 513 static int 514 carp_input(struct mbuf **mp, int *offp, int proto) 515 { 516 struct mbuf *m = *mp; 517 struct ip *ip; 518 struct vrrpv3_header *vh; 519 int iplen; 520 int minlen; 521 int totlen; 522 523 iplen = *offp; 524 *mp = NULL; 525 526 CARPSTATS_INC(carps_ipackets); 527 528 if (!V_carp_allow) { 529 m_freem(m); 530 return (IPPROTO_DONE); 531 } 532 533 /* Ensure we have enough header to figure out the version. */ 534 if (m->m_pkthdr.len < iplen + sizeof(*vh)) { 535 CARPSTATS_INC(carps_badlen); 536 CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " 537 "on %s\n", __func__, m->m_len - sizeof(struct ip), 538 if_name(m->m_pkthdr.rcvif)); 539 m_freem(m); 540 return (IPPROTO_DONE); 541 } 542 543 if (m->m_len < iplen + sizeof(*vh)) { 544 if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { 545 CARPSTATS_INC(carps_hdrops); 546 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 547 return (IPPROTO_DONE); 548 } 549 } 550 ip = mtod(m, struct ip *); 551 totlen = ntohs(ip->ip_len); 552 vh = (struct vrrpv3_header *)((char *)ip + iplen); 553 554 switch (vh->vrrp_version) { 555 case CARP_VERSION_CARP: 556 minlen = sizeof(struct carp_header); 557 break; 558 case CARP_VERSION_VRRPv3: 559 minlen = sizeof(struct vrrpv3_header); 560 break; 561 default: 562 CARPSTATS_INC(carps_badver); 563 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 564 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 565 m_freem(m); 566 return (IPPROTO_DONE); 567 } 568 569 /* And now check the length again but with the real minimal length. */ 570 if (m->m_pkthdr.len < iplen + minlen) { 571 CARPSTATS_INC(carps_badlen); 572 CARP_DEBUG("%s: received len %zd < %d " 573 "on %s\n", __func__, m->m_len - sizeof(struct ip), 574 iplen + minlen, 575 if_name(m->m_pkthdr.rcvif)); 576 m_freem(m); 577 return (IPPROTO_DONE); 578 } 579 580 if (m->m_len < iplen + minlen) { 581 if ((m = m_pullup(m, iplen + minlen)) == NULL) { 582 CARPSTATS_INC(carps_hdrops); 583 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 584 return (IPPROTO_DONE); 585 } 586 ip = mtod(m, struct ip *); 587 vh = (struct vrrpv3_header *)((char *)ip + iplen); 588 } 589 590 switch (vh->vrrp_version) { 591 case CARP_VERSION_CARP: { 592 struct carp_header *ch; 593 594 /* verify the CARP checksum */ 595 if (in_cksum_skip(m, totlen, iplen)) { 596 CARPSTATS_INC(carps_badsum); 597 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 598 if_name(m->m_pkthdr.rcvif)); 599 m_freem(m); 600 break; 601 } 602 ch = (struct carp_header *)((char *)ip + iplen); 603 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 604 break; 605 } 606 case CARP_VERSION_VRRPv3: { 607 uint16_t phdrcksum; 608 609 phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 610 htonl((u_short)(totlen - iplen) + ip->ip_p)); 611 vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, 612 phdrcksum); 613 break; 614 } 615 default: 616 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 617 } 618 619 return (IPPROTO_DONE); 620 } 621 #endif 622 623 #ifdef INET6 624 static int 625 carp6_input(struct mbuf **mp, int *offp, int proto) 626 { 627 struct mbuf *m = *mp; 628 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 629 struct vrrpv3_header *vh; 630 u_int len, minlen; 631 632 CARPSTATS_INC(carps_ipackets6); 633 634 if (!V_carp_allow) { 635 m_freem(m); 636 return (IPPROTO_DONE); 637 } 638 639 /* check if received on a valid carp interface */ 640 if (m->m_pkthdr.rcvif->if_carp == NULL) { 641 CARPSTATS_INC(carps_badif); 642 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 643 __func__, if_name(m->m_pkthdr.rcvif)); 644 m_freem(m); 645 return (IPPROTO_DONE); 646 } 647 648 if (m->m_len < *offp + sizeof(*vh)) { 649 len = m->m_len; 650 m = m_pullup(m, *offp + sizeof(*vh)); 651 if (m == NULL) { 652 CARPSTATS_INC(carps_badlen); 653 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 654 return (IPPROTO_DONE); 655 } 656 ip6 = mtod(m, struct ip6_hdr *); 657 } 658 vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); 659 660 switch (vh->vrrp_version) { 661 case CARP_VERSION_CARP: 662 minlen = sizeof(struct carp_header); 663 break; 664 case CARP_VERSION_VRRPv3: 665 minlen = sizeof(struct vrrpv3_header); 666 break; 667 default: 668 CARPSTATS_INC(carps_badver); 669 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 670 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 671 m_freem(m); 672 return (IPPROTO_DONE); 673 } 674 675 /* And now check the length again but with the real minimal length. */ 676 if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { 677 CARPSTATS_INC(carps_badlen); 678 CARP_DEBUG("%s: received len %zd < %zd " 679 "on %s\n", __func__, m->m_len - sizeof(struct ip), 680 sizeof(*ip6) + minlen, 681 if_name(m->m_pkthdr.rcvif)); 682 m_freem(m); 683 return (IPPROTO_DONE); 684 } 685 686 if (m->m_len < sizeof(*ip6) + minlen) { 687 if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { 688 CARPSTATS_INC(carps_hdrops); 689 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 690 return (IPPROTO_DONE); 691 } 692 ip6 = mtod(m, struct ip6_hdr *); 693 vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 694 } 695 696 switch (vh->vrrp_version) { 697 case CARP_VERSION_CARP: { 698 struct carp_header *ch; 699 700 /* verify the CARP checksum */ 701 if (in_cksum_skip(m, *offp + sizeof(struct carp_header), 702 *offp)) { 703 CARPSTATS_INC(carps_badsum); 704 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 705 if_name(m->m_pkthdr.rcvif)); 706 m_freem(m); 707 break; 708 } 709 ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); 710 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 711 break; 712 } 713 case CARP_VERSION_VRRPv3: { 714 uint16_t phdrcksum; 715 716 phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 717 ip6->ip6_nxt, 0); 718 vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, 719 ntohs(ip6->ip6_plen), phdrcksum); 720 break; 721 } 722 default: 723 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 724 } 725 return (IPPROTO_DONE); 726 } 727 #endif /* INET6 */ 728 729 /* 730 * This routine should not be necessary at all, but some switches 731 * (VMWare ESX vswitches) can echo our own packets back at us, 732 * and we must ignore them or they will cause us to drop out of 733 * MASTER mode. 734 * 735 * We cannot catch all cases of network loops. Instead, what we 736 * do here is catch any packet that arrives with a carp header 737 * with a VHID of 0, that comes from an address that is our own. 738 * These packets are by definition "from us" (even if they are from 739 * a misconfigured host that is pretending to be us). 740 * 741 * The VHID test is outside this mini-function. 742 */ 743 static int 744 carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 745 { 746 #ifdef INET 747 struct ip *ip4; 748 struct in_addr in4; 749 #endif 750 #ifdef INET6 751 struct ip6_hdr *ip6; 752 struct in6_addr in6; 753 #endif 754 755 switch (af) { 756 #ifdef INET 757 case AF_INET: 758 ip4 = mtod(m, struct ip *); 759 in4 = ifatoia(ifa)->ia_addr.sin_addr; 760 return (in4.s_addr == ip4->ip_src.s_addr); 761 #endif 762 #ifdef INET6 763 case AF_INET6: 764 ip6 = mtod(m, struct ip6_hdr *); 765 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 766 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 767 #endif 768 default: 769 break; 770 } 771 return (0); 772 } 773 774 static struct ifaddr * 775 carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) 776 { 777 struct ifnet *ifp = m->m_pkthdr.rcvif; 778 struct ifaddr *ifa, *match; 779 int error; 780 781 NET_EPOCH_ASSERT(); 782 783 /* 784 * Verify that the VHID is valid on the receiving interface. 785 * 786 * There should be just one match. If there are none 787 * the VHID is not valid and we drop the packet. If 788 * there are multiple VHID matches, take just the first 789 * one, for compatibility with previous code. While we're 790 * scanning, check for obvious loops in the network topology 791 * (these should never happen, and as noted above, we may 792 * miss real loops; this is just a double-check). 793 */ 794 error = 0; 795 match = NULL; 796 IFNET_FOREACH_IFA(ifp, ifa) { 797 if (match == NULL && ifa->ifa_carp != NULL && 798 ifa->ifa_addr->sa_family == af && 799 ifa->ifa_carp->sc_vhid == vhid) 800 match = ifa; 801 if (vhid == 0 && carp_source_is_self(m, ifa, af)) 802 error = ELOOP; 803 } 804 ifa = error ? NULL : match; 805 if (ifa != NULL) 806 ifa_ref(ifa); 807 808 if (ifa == NULL) { 809 if (error == ELOOP) { 810 CARP_DEBUG("dropping looped packet on interface %s\n", 811 if_name(ifp)); 812 CARPSTATS_INC(carps_badif); /* ??? */ 813 } else { 814 CARPSTATS_INC(carps_badvhid); 815 } 816 } 817 818 return (ifa); 819 } 820 821 static void 822 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 823 { 824 struct ifnet *ifp = m->m_pkthdr.rcvif; 825 struct ifaddr *ifa; 826 struct carp_softc *sc; 827 uint64_t tmp_counter; 828 struct timeval sc_tv, ch_tv; 829 bool multicast = false; 830 831 NET_EPOCH_ASSERT(); 832 MPASS(ch->carp_version == CARP_VERSION_CARP); 833 834 ifa = carp_find_ifa(m, af, ch->carp_vhid); 835 if (ifa == NULL) { 836 m_freem(m); 837 return; 838 } 839 840 sc = ifa->ifa_carp; 841 CARP_LOCK(sc); 842 843 /* verify the CARP version. */ 844 if (sc->sc_version != CARP_VERSION_CARP) { 845 CARP_UNLOCK(sc); 846 847 CARPSTATS_INC(carps_badver); 848 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 849 ch->carp_version); 850 ifa_free(ifa); 851 m_freem(m); 852 return; 853 } 854 855 if (ifa->ifa_addr->sa_family == AF_INET) { 856 multicast = IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)); 857 } else { 858 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 859 } 860 ifa_free(ifa); 861 862 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 863 if (multicast && ttl != CARP_DFLTTL) { 864 CARPSTATS_INC(carps_badttl); 865 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 866 ttl, if_name(m->m_pkthdr.rcvif)); 867 goto out; 868 } 869 870 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 871 CARPSTATS_INC(carps_badauth); 872 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 873 sc->sc_vhid, if_name(ifp)); 874 goto out; 875 } 876 877 tmp_counter = ntohl(ch->carp_counter[0]); 878 tmp_counter = tmp_counter<<32; 879 tmp_counter += ntohl(ch->carp_counter[1]); 880 881 /* XXX Replay protection goes here */ 882 883 sc->sc_init_counter = false; 884 sc->sc_counter = tmp_counter; 885 886 sc_tv.tv_sec = sc->sc_advbase; 887 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 888 ch_tv.tv_sec = ch->carp_advbase; 889 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 890 891 switch (sc->sc_state) { 892 case INIT: 893 break; 894 case MASTER: 895 /* 896 * If we receive an advertisement from a master who's going to 897 * be more frequent than us, go into BACKUP state. 898 */ 899 if (timevalcmp(&sc_tv, &ch_tv, >) || 900 timevalcmp(&sc_tv, &ch_tv, ==)) { 901 callout_stop(&sc->sc_ad_tmo); 902 carp_set_state(sc, BACKUP, 903 "more frequent advertisement received"); 904 carp_setrun(sc, 0); 905 carp_delroute(sc); 906 } 907 break; 908 case BACKUP: 909 /* 910 * If we're pre-empting masters who advertise slower than us, 911 * and this one claims to be slower, treat him as down. 912 */ 913 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 914 carp_master_down_locked(sc, 915 "preempting a slower master"); 916 break; 917 } 918 919 /* 920 * If the master is going to advertise at such a low frequency 921 * that he's guaranteed to time out, we'd might as well just 922 * treat him as timed out now. 923 */ 924 sc_tv.tv_sec = sc->sc_advbase * 3; 925 if (timevalcmp(&sc_tv, &ch_tv, <)) { 926 carp_master_down_locked(sc, "master will time out"); 927 break; 928 } 929 930 /* 931 * Otherwise, we reset the counter and wait for the next 932 * advertisement. 933 */ 934 carp_setrun(sc, af); 935 break; 936 } 937 938 out: 939 CARP_UNLOCK(sc); 940 m_freem(m); 941 } 942 943 static void 944 vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, 945 int len, uint16_t phdrcksum) 946 { 947 struct vrrpv3_header *vh = mtodo(m, off); 948 struct ifnet *ifp = m->m_pkthdr.rcvif; 949 struct ifaddr *ifa; 950 struct carp_softc *sc; 951 952 NET_EPOCH_ASSERT(); 953 MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); 954 955 ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); 956 if (ifa == NULL) { 957 m_freem(m); 958 return; 959 } 960 961 sc = ifa->ifa_carp; 962 CARP_LOCK(sc); 963 964 ifa_free(ifa); 965 966 /* verify the CARP version. */ 967 if (sc->sc_version != CARP_VERSION_VRRPv3) { 968 CARP_UNLOCK(sc); 969 970 CARPSTATS_INC(carps_badver); 971 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 972 vh->vrrp_version); 973 m_freem(m); 974 return; 975 } 976 977 /* verify that the IP TTL is 255. */ 978 if (ttl != CARP_DFLTTL) { 979 CARPSTATS_INC(carps_badttl); 980 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 981 ttl, if_name(m->m_pkthdr.rcvif)); 982 goto out; 983 } 984 985 if (vrrp_checksum_verify(m, off, len, phdrcksum)) { 986 CARPSTATS_INC(carps_badsum); 987 CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, 988 sc->sc_vhid, if_name(ifp)); 989 goto out; 990 } 991 992 /* RFC9568, 7.1 Receiving VRRP packets. */ 993 if (sc->sc_vrrp_prio == 255) { 994 CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", 995 __func__); 996 goto out; 997 } 998 999 /* XXX TODO Check IP address payload. */ 1000 1001 sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); 1002 1003 switch (sc->sc_state) { 1004 case INIT: 1005 break; 1006 case MASTER: 1007 /* 1008 * If we receive an advertisement from a master who's going to 1009 * be more frequent than us, go into BACKUP state. 1010 * Same if the peer has a higher priority than us. 1011 */ 1012 if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || 1013 vh->vrrp_priority > sc->sc_vrrp_prio) { 1014 callout_stop(&sc->sc_ad_tmo); 1015 carp_set_state(sc, BACKUP, 1016 "more frequent advertisement received"); 1017 carp_setrun(sc, 0); 1018 carp_delroute(sc); 1019 } 1020 break; 1021 case BACKUP: 1022 /* 1023 * If we're pre-empting masters who advertise slower than us, 1024 * and this one claims to be slower, treat him as down. 1025 */ 1026 if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter 1027 || vh->vrrp_priority < sc->sc_vrrp_prio)) { 1028 carp_master_down_locked(sc, 1029 "preempting a slower master"); 1030 break; 1031 } 1032 1033 /* 1034 * Otherwise, we reset the counter and wait for the next 1035 * advertisement. 1036 */ 1037 carp_setrun(sc, af); 1038 break; 1039 } 1040 1041 out: 1042 CARP_UNLOCK(sc); 1043 m_freem(m); 1044 } 1045 1046 static int 1047 carp_tag(struct carp_softc *sc, struct mbuf *m) 1048 { 1049 struct m_tag *mtag; 1050 1051 /* Tag packet for carp_output */ 1052 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), 1053 M_NOWAIT)) == NULL) { 1054 m_freem(m); 1055 CARPSTATS_INC(carps_onomem); 1056 return (ENOMEM); 1057 } 1058 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1059 m_tag_prepend(m, mtag); 1060 1061 return (0); 1062 } 1063 1064 static void 1065 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 1066 { 1067 1068 MPASS(sc->sc_version == CARP_VERSION_CARP); 1069 1070 if (sc->sc_init_counter) { 1071 /* this could also be seconds since unix epoch */ 1072 sc->sc_counter = arc4random(); 1073 sc->sc_counter = sc->sc_counter << 32; 1074 sc->sc_counter += arc4random(); 1075 } else 1076 sc->sc_counter++; 1077 1078 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 1079 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 1080 1081 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1082 } 1083 1084 static inline void 1085 send_ad_locked(struct carp_softc *sc) 1086 { 1087 switch (sc->sc_version) { 1088 case CARP_VERSION_CARP: 1089 carp_send_ad_locked(sc); 1090 break; 1091 case CARP_VERSION_VRRPv3: 1092 vrrp_send_ad_locked(sc); 1093 break; 1094 } 1095 } 1096 1097 /* 1098 * To avoid LORs and possible recursions this function shouldn't 1099 * be called directly, but scheduled via taskqueue. 1100 */ 1101 static void 1102 carp_send_ad_all(void *ctx __unused, int pending __unused) 1103 { 1104 struct carp_softc *sc; 1105 struct epoch_tracker et; 1106 1107 NET_EPOCH_ENTER(et); 1108 mtx_lock(&carp_mtx); 1109 LIST_FOREACH(sc, &carp_list, sc_next) 1110 if (sc->sc_state == MASTER) { 1111 CARP_LOCK(sc); 1112 CURVNET_SET(sc->sc_carpdev->if_vnet); 1113 send_ad_locked(sc); 1114 CURVNET_RESTORE(); 1115 CARP_UNLOCK(sc); 1116 } 1117 mtx_unlock(&carp_mtx); 1118 NET_EPOCH_EXIT(et); 1119 } 1120 1121 /* Send a periodic advertisement, executed in callout context. */ 1122 static void 1123 carp_callout(void *v) 1124 { 1125 struct carp_softc *sc = v; 1126 struct epoch_tracker et; 1127 1128 NET_EPOCH_ENTER(et); 1129 CARP_LOCK_ASSERT(sc); 1130 CURVNET_SET(sc->sc_carpdev->if_vnet); 1131 send_ad_locked(sc); 1132 CURVNET_RESTORE(); 1133 CARP_UNLOCK(sc); 1134 NET_EPOCH_EXIT(et); 1135 } 1136 1137 static void 1138 carp_send_ad_error(struct carp_softc *sc, int error) 1139 { 1140 1141 /* 1142 * We track errors and successful sends with this logic: 1143 * - Any error resets success counter to 0. 1144 * - MAX_ERRORS triggers demotion. 1145 * - MIN_SUCCESS successes resets error counter to 0. 1146 * - MIN_SUCCESS reverts demotion, if it was triggered before. 1147 */ 1148 if (error) { 1149 if (sc->sc_sendad_errors < INT_MAX) 1150 sc->sc_sendad_errors++; 1151 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1152 static const char fmt[] = "send error %d on %s"; 1153 char msg[sizeof(fmt) + IFNAMSIZ]; 1154 1155 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 1156 carp_demote_adj(V_carp_senderr_adj, msg); 1157 } 1158 sc->sc_sendad_success = 0; 1159 } else if (sc->sc_sendad_errors > 0) { 1160 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 1161 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1162 static const char fmt[] = "send ok on %s"; 1163 char msg[sizeof(fmt) + IFNAMSIZ]; 1164 1165 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 1166 carp_demote_adj(-V_carp_senderr_adj, msg); 1167 } 1168 sc->sc_sendad_errors = 0; 1169 } 1170 } 1171 } 1172 1173 /* 1174 * Pick the best ifaddr on the given ifp for sending CARP 1175 * advertisements. 1176 * 1177 * "Best" here is defined by ifa_preferred(). This function is much 1178 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 1179 * 1180 * (This could be simplified to return the actual address, except that 1181 * it has a different format in AF_INET and AF_INET6.) 1182 */ 1183 static struct ifaddr * 1184 carp_best_ifa(int af, struct ifnet *ifp) 1185 { 1186 struct ifaddr *ifa, *best; 1187 1188 NET_EPOCH_ASSERT(); 1189 1190 if (af >= AF_MAX) 1191 return (NULL); 1192 best = NULL; 1193 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1194 if (ifa->ifa_addr->sa_family == af && 1195 (best == NULL || ifa_preferred(best, ifa))) 1196 best = ifa; 1197 } 1198 if (best != NULL) 1199 ifa_ref(best); 1200 return (best); 1201 } 1202 1203 static void 1204 carp_send_ad_locked(struct carp_softc *sc) 1205 { 1206 struct carp_header ch; 1207 struct timeval tv; 1208 struct ifaddr *ifa; 1209 struct carp_header *ch_ptr; 1210 struct mbuf *m; 1211 int len, advskew; 1212 1213 NET_EPOCH_ASSERT(); 1214 CARP_LOCK_ASSERT(sc); 1215 MPASS(sc->sc_version == CARP_VERSION_CARP); 1216 1217 advskew = DEMOTE_ADVSKEW(sc); 1218 tv.tv_sec = sc->sc_advbase; 1219 tv.tv_usec = advskew * 1000000 / 256; 1220 1221 ch.carp_version = CARP_VERSION_CARP; 1222 ch.carp_type = CARP_ADVERTISEMENT; 1223 ch.carp_vhid = sc->sc_vhid; 1224 ch.carp_advbase = sc->sc_advbase; 1225 ch.carp_advskew = advskew; 1226 ch.carp_authlen = 7; /* XXX DEFINE */ 1227 ch.carp_pad1 = 0; /* must be zero */ 1228 ch.carp_cksum = 0; 1229 1230 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 1231 1232 #ifdef INET 1233 if (sc->sc_naddrs) { 1234 struct ip *ip; 1235 1236 m = m_gethdr(M_NOWAIT, MT_DATA); 1237 if (m == NULL) { 1238 CARPSTATS_INC(carps_onomem); 1239 goto resched; 1240 } 1241 len = sizeof(*ip) + sizeof(ch); 1242 m->m_pkthdr.len = len; 1243 m->m_pkthdr.rcvif = NULL; 1244 m->m_len = len; 1245 M_ALIGN(m, m->m_len); 1246 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr))) 1247 m->m_flags |= M_MCAST; 1248 ip = mtod(m, struct ip *); 1249 ip->ip_v = IPVERSION; 1250 ip->ip_hl = sizeof(*ip) >> 2; 1251 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1252 ip->ip_len = htons(len); 1253 ip->ip_off = htons(IP_DF); 1254 ip->ip_ttl = CARP_DFLTTL; 1255 ip->ip_p = IPPROTO_CARP; 1256 ip->ip_sum = 0; 1257 ip_fillid(ip, V_ip_random_id); 1258 1259 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1260 if (ifa != NULL) { 1261 ip->ip_src.s_addr = 1262 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1263 ifa_free(ifa); 1264 } else 1265 ip->ip_src.s_addr = 0; 1266 ip->ip_dst = sc->sc_carpaddr; 1267 1268 ch_ptr = (struct carp_header *)(&ip[1]); 1269 bcopy(&ch, ch_ptr, sizeof(ch)); 1270 carp_prepare_ad(m, sc, ch_ptr); 1271 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && 1272 carp_tag(sc, m) != 0) 1273 goto resched; 1274 1275 m->m_data += sizeof(*ip); 1276 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1277 m->m_data -= sizeof(*ip); 1278 1279 CARPSTATS_INC(carps_opackets); 1280 1281 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1282 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1283 } 1284 #endif /* INET */ 1285 #ifdef INET6 1286 if (sc->sc_naddrs6) { 1287 struct ip6_hdr *ip6; 1288 1289 m = m_gethdr(M_NOWAIT, MT_DATA); 1290 if (m == NULL) { 1291 CARPSTATS_INC(carps_onomem); 1292 goto resched; 1293 } 1294 len = sizeof(*ip6) + sizeof(ch); 1295 m->m_pkthdr.len = len; 1296 m->m_pkthdr.rcvif = NULL; 1297 m->m_len = len; 1298 M_ALIGN(m, m->m_len); 1299 ip6 = mtod(m, struct ip6_hdr *); 1300 bzero(ip6, sizeof(*ip6)); 1301 ip6->ip6_vfc |= IPV6_VERSION; 1302 /* Traffic class isn't defined in ip6 struct instead 1303 * it gets offset into flowid field */ 1304 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1305 IPTOS_DSCP_OFFSET)); 1306 ip6->ip6_hlim = CARP_DFLTTL; 1307 ip6->ip6_nxt = IPPROTO_CARP; 1308 1309 /* set the source address */ 1310 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1311 if (ifa != NULL) { 1312 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1313 sizeof(struct in6_addr)); 1314 ifa_free(ifa); 1315 } else 1316 /* This should never happen with IPv6. */ 1317 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1318 1319 /* Set the multicast destination. */ 1320 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1321 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1322 IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { 1323 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1324 m_freem(m); 1325 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1326 goto resched; 1327 } 1328 } 1329 1330 ch_ptr = (struct carp_header *)(&ip6[1]); 1331 bcopy(&ch, ch_ptr, sizeof(ch)); 1332 carp_prepare_ad(m, sc, ch_ptr); 1333 if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && 1334 carp_tag(sc, m) != 0) 1335 goto resched; 1336 1337 m->m_data += sizeof(*ip6); 1338 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1339 m->m_data -= sizeof(*ip6); 1340 1341 CARPSTATS_INC(carps_opackets6); 1342 1343 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1344 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1345 } 1346 #endif /* INET6 */ 1347 1348 resched: 1349 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); 1350 } 1351 1352 static void 1353 vrrp_send_ad_locked(struct carp_softc *sc) 1354 { 1355 struct vrrpv3_header *vh_ptr; 1356 struct ifaddr *ifa; 1357 struct mbuf *m; 1358 int len; 1359 struct vrrpv3_header vh = { 1360 .vrrp_version = CARP_VERSION_VRRPv3, 1361 .vrrp_type = VRRP_TYPE_ADVERTISEMENT, 1362 .vrrp_vrtid = sc->sc_vhid, 1363 .vrrp_priority = sc->sc_vrrp_prio, 1364 .vrrp_count_addr = 0, 1365 .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), 1366 .vrrp_checksum = 0, 1367 }; 1368 1369 NET_EPOCH_ASSERT(); 1370 CARP_LOCK_ASSERT(sc); 1371 MPASS(sc->sc_version == CARP_VERSION_VRRPv3); 1372 1373 #ifdef INET 1374 if (sc->sc_naddrs) { 1375 struct ip *ip; 1376 1377 m = m_gethdr(M_NOWAIT, MT_DATA); 1378 if (m == NULL) { 1379 CARPSTATS_INC(carps_onomem); 1380 goto resched; 1381 } 1382 len = sizeof(*ip) + sizeof(vh); 1383 m->m_pkthdr.len = len; 1384 m->m_pkthdr.rcvif = NULL; 1385 m->m_len = len; 1386 M_ALIGN(m, m->m_len); 1387 m->m_flags |= M_MCAST; 1388 ip = mtod(m, struct ip *); 1389 ip->ip_v = IPVERSION; 1390 ip->ip_hl = sizeof(*ip) >> 2; 1391 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1392 ip->ip_off = htons(IP_DF); 1393 ip->ip_ttl = CARP_DFLTTL; 1394 ip->ip_p = IPPROTO_CARP; 1395 ip->ip_sum = 0; 1396 ip_fillid(ip, V_ip_random_id); 1397 1398 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1399 if (ifa != NULL) { 1400 ip->ip_src.s_addr = 1401 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1402 ifa_free(ifa); 1403 } else 1404 ip->ip_src.s_addr = 0; 1405 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1406 1407 /* Include the IP addresses in the announcement. */ 1408 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1409 struct sockaddr_in *in; 1410 1411 MPASS(sc->sc_ifas[i] != NULL); 1412 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) 1413 continue; 1414 1415 in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; 1416 1417 if (m_append(m, sizeof(in->sin_addr), 1418 (caddr_t)&in->sin_addr) != 1) { 1419 m_freem(m); 1420 goto resched; 1421 } 1422 1423 vh.vrrp_count_addr++; 1424 len += sizeof(in->sin_addr); 1425 } 1426 ip->ip_len = htons(len); 1427 1428 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); 1429 bcopy(&vh, vh_ptr, sizeof(vh)); 1430 1431 vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, 1432 ip->ip_dst.s_addr, 1433 htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); 1434 vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); 1435 1436 if (carp_tag(sc, m)) 1437 goto resched; 1438 1439 CARPSTATS_INC(carps_opackets); 1440 1441 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1442 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1443 } 1444 #endif 1445 #ifdef INET6 1446 if (sc->sc_naddrs6) { 1447 struct ip6_hdr *ip6; 1448 1449 m = m_gethdr(M_NOWAIT, MT_DATA); 1450 if (m == NULL) { 1451 CARPSTATS_INC(carps_onomem); 1452 goto resched; 1453 } 1454 len = sizeof(*ip6) + sizeof(vh); 1455 m->m_pkthdr.len = len; 1456 m->m_pkthdr.rcvif = NULL; 1457 m->m_len = len; 1458 M_ALIGN(m, m->m_len); 1459 m->m_flags |= M_MCAST; 1460 ip6 = mtod(m, struct ip6_hdr *); 1461 bzero(ip6, sizeof(*ip6)); 1462 ip6->ip6_vfc |= IPV6_VERSION; 1463 /* Traffic class isn't defined in ip6 struct instead 1464 * it gets offset into flowid field */ 1465 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1466 IPTOS_DSCP_OFFSET)); 1467 ip6->ip6_hlim = CARP_DFLTTL; 1468 ip6->ip6_nxt = IPPROTO_CARP; 1469 1470 /* set the source address */ 1471 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1472 if (ifa != NULL) { 1473 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1474 sizeof(struct in6_addr)); 1475 ifa_free(ifa); 1476 } else 1477 /* This should never happen with IPv6. */ 1478 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1479 1480 /* Set the multicast destination. */ 1481 bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); 1482 ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1483 ip6->ip6_dst.s6_addr8[15] = 0x12; 1484 1485 /* Include the IP addresses in the announcement. */ 1486 len = sizeof(vh); 1487 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1488 struct sockaddr_in6 *in6; 1489 1490 MPASS(sc->sc_ifas[i] != NULL); 1491 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) 1492 continue; 1493 1494 in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; 1495 1496 if (m_append(m, sizeof(in6->sin6_addr), 1497 (char *)&in6->sin6_addr) != 1) { 1498 m_freem(m); 1499 goto resched; 1500 } 1501 1502 vh.vrrp_count_addr++; 1503 len += sizeof(in6->sin6_addr); 1504 } 1505 ip6->ip6_plen = htonl(len); 1506 1507 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 1508 bcopy(&vh, vh_ptr, sizeof(vh)); 1509 1510 vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); 1511 vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); 1512 1513 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1514 m_freem(m); 1515 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1516 goto resched; 1517 } 1518 1519 if (carp_tag(sc, m)) 1520 goto resched; 1521 CARPSTATS_INC(carps_opackets6); 1522 1523 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1524 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1525 } 1526 #endif 1527 1528 resched: 1529 callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, 1530 carp_callout, sc); 1531 } 1532 1533 static void 1534 carp_addroute(struct carp_softc *sc) 1535 { 1536 struct ifaddr *ifa; 1537 1538 CARP_FOREACH_IFA(sc, ifa) 1539 carp_ifa_addroute(ifa); 1540 } 1541 1542 static void 1543 carp_ifa_addroute(struct ifaddr *ifa) 1544 { 1545 1546 switch (ifa->ifa_addr->sa_family) { 1547 #ifdef INET 1548 case AF_INET: 1549 in_addprefix(ifatoia(ifa)); 1550 ifa_add_loopback_route(ifa, 1551 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1552 break; 1553 #endif 1554 #ifdef INET6 1555 case AF_INET6: 1556 ifa_add_loopback_route(ifa, 1557 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1558 nd6_add_ifa_lle(ifatoia6(ifa)); 1559 break; 1560 #endif 1561 } 1562 } 1563 1564 static void 1565 carp_delroute(struct carp_softc *sc) 1566 { 1567 struct ifaddr *ifa; 1568 1569 CARP_FOREACH_IFA(sc, ifa) 1570 carp_ifa_delroute(ifa); 1571 } 1572 1573 static void 1574 carp_ifa_delroute(struct ifaddr *ifa) 1575 { 1576 1577 switch (ifa->ifa_addr->sa_family) { 1578 #ifdef INET 1579 case AF_INET: 1580 ifa_del_loopback_route(ifa, 1581 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1582 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1583 break; 1584 #endif 1585 #ifdef INET6 1586 case AF_INET6: 1587 ifa_del_loopback_route(ifa, 1588 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1589 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1590 break; 1591 #endif 1592 } 1593 } 1594 1595 int 1596 carp_master(struct ifaddr *ifa) 1597 { 1598 struct carp_softc *sc = ifa->ifa_carp; 1599 1600 return (sc->sc_state == MASTER); 1601 } 1602 1603 #ifdef INET 1604 /* 1605 * Broadcast a gratuitous ARP request containing 1606 * the virtual router MAC address for each IP address 1607 * associated with the virtual router. 1608 */ 1609 static void 1610 carp_send_arp(struct carp_softc *sc) 1611 { 1612 struct ifaddr *ifa; 1613 struct in_addr addr; 1614 1615 NET_EPOCH_ASSERT(); 1616 1617 CARP_FOREACH_IFA(sc, ifa) { 1618 if (ifa->ifa_addr->sa_family != AF_INET) 1619 continue; 1620 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1621 arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); 1622 } 1623 } 1624 1625 int 1626 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1627 { 1628 struct carp_softc *sc = ifa->ifa_carp; 1629 1630 if (sc->sc_state == MASTER) { 1631 *enaddr = sc->sc_addr; 1632 return (1); 1633 } 1634 1635 return (0); 1636 } 1637 #endif 1638 1639 #ifdef INET6 1640 static void 1641 carp_send_na(struct carp_softc *sc) 1642 { 1643 struct ifaddr *ifa; 1644 int flags; 1645 1646 /* 1647 * Sending Unsolicited Neighbor Advertisements 1648 * 1649 * If the node is a router, we MUST set the Router flag to one. 1650 * We set Override flag to one and send link-layer address option, 1651 * thus neighboring nodes will install the new link-layer address. 1652 */ 1653 flags = ND_NA_FLAG_OVERRIDE; 1654 if (V_ip6_forwarding) 1655 flags |= ND_NA_FLAG_ROUTER; 1656 CARP_FOREACH_IFA(sc, ifa) { 1657 if (ifa->ifa_addr->sa_family != AF_INET6) 1658 continue; 1659 /* 1660 * We use unspecified address as destination here to avoid 1661 * scope initialization for each call. 1662 * nd6_na_output() will use all nodes multicast address if 1663 * destinaion address is unspecified. 1664 */ 1665 nd6_na_output(sc->sc_carpdev, &in6addr_any, IFA_IN6(ifa), 1666 flags, ND6_NA_OPT_LLA | ND6_NA_CARP_MASTER, NULL); 1667 DELAY(1000); /* RetransTimer */ 1668 } 1669 } 1670 1671 /* 1672 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1673 * matches and is not a carp address. Returns NULL otherwise. 1674 */ 1675 struct ifaddr * 1676 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1677 { 1678 struct ifaddr *ifa; 1679 1680 NET_EPOCH_ASSERT(); 1681 1682 ifa = NULL; 1683 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1684 if (ifa->ifa_addr->sa_family != AF_INET6) 1685 continue; 1686 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1687 continue; 1688 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1689 ifa = NULL; 1690 else 1691 ifa_ref(ifa); 1692 break; 1693 } 1694 1695 return (ifa); 1696 } 1697 1698 char * 1699 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1700 { 1701 struct ifaddr *ifa; 1702 char *mac = NULL; 1703 1704 NET_EPOCH_ASSERT(); 1705 1706 IFNET_FOREACH_IFA(ifp, ifa) 1707 if (ifa->ifa_addr->sa_family == AF_INET6 && 1708 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1709 struct carp_softc *sc = ifa->ifa_carp; 1710 struct m_tag *mtag; 1711 1712 mtag = m_tag_get(PACKET_TAG_CARP, 1713 sizeof(sc->sc_vhid) + sizeof(sc->sc_addr), 1714 M_NOWAIT); 1715 if (mtag == NULL) { 1716 CARPSTATS_INC(carps_onomem); 1717 break; 1718 } 1719 /* carp_output expects sc_vhid first. */ 1720 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1721 /* 1722 * Save sc_addr into mtag data after sc_vhid to avoid 1723 * possible access to destroyed softc. 1724 */ 1725 mac = (char *)(mtag + 1) + sizeof(sc->sc_vhid); 1726 bcopy(sc->sc_addr, mac, sizeof(sc->sc_addr)); 1727 1728 m_tag_prepend(m, mtag); 1729 break; 1730 } 1731 1732 return (mac); 1733 } 1734 #endif /* INET6 */ 1735 1736 int 1737 carp_forus(struct ifnet *ifp, u_char *dhost) 1738 { 1739 struct carp_softc *sc; 1740 uint8_t *ena = dhost; 1741 1742 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1743 return (0); 1744 1745 CIF_LOCK(ifp->if_carp); 1746 IFNET_FOREACH_CARP(ifp, sc) { 1747 /* 1748 * CARP_LOCK() is not here, since would protect nothing, but 1749 * cause deadlock with if_bridge, calling this under its lock. 1750 */ 1751 if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, 1752 ETHER_ADDR_LEN)) { 1753 CIF_UNLOCK(ifp->if_carp); 1754 return (1); 1755 } 1756 } 1757 CIF_UNLOCK(ifp->if_carp); 1758 1759 return (0); 1760 } 1761 1762 /* Master down timeout event, executed in callout context. */ 1763 static void 1764 carp_master_down(void *v) 1765 { 1766 struct carp_softc *sc = v; 1767 struct epoch_tracker et; 1768 1769 NET_EPOCH_ENTER(et); 1770 CARP_LOCK_ASSERT(sc); 1771 1772 CURVNET_SET(sc->sc_carpdev->if_vnet); 1773 if (sc->sc_state == BACKUP) { 1774 carp_master_down_locked(sc, "master timed out"); 1775 } 1776 CURVNET_RESTORE(); 1777 1778 CARP_UNLOCK(sc); 1779 NET_EPOCH_EXIT(et); 1780 } 1781 1782 static void 1783 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1784 { 1785 1786 NET_EPOCH_ASSERT(); 1787 CARP_LOCK_ASSERT(sc); 1788 1789 switch (sc->sc_state) { 1790 case BACKUP: 1791 carp_set_state(sc, MASTER, reason); 1792 send_ad_locked(sc); 1793 #ifdef INET 1794 carp_send_arp(sc); 1795 #endif 1796 #ifdef INET6 1797 carp_send_na(sc); 1798 #endif 1799 carp_setrun(sc, 0); 1800 carp_addroute(sc); 1801 break; 1802 case INIT: 1803 case MASTER: 1804 #ifdef INVARIANTS 1805 panic("carp: VHID %u@%s: master_down event in %s state\n", 1806 sc->sc_vhid, 1807 if_name(sc->sc_carpdev), 1808 sc->sc_state ? "MASTER" : "INIT"); 1809 #endif 1810 break; 1811 } 1812 } 1813 1814 /* 1815 * When in backup state, af indicates whether to reset the master down timer 1816 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1817 */ 1818 static void 1819 carp_setrun(struct carp_softc *sc, sa_family_t af) 1820 { 1821 struct timeval tv; 1822 int timeout; 1823 1824 CARP_LOCK_ASSERT(sc); 1825 1826 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1827 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1828 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1829 !V_carp_allow) 1830 return; 1831 1832 switch (sc->sc_state) { 1833 case INIT: 1834 carp_set_state(sc, BACKUP, "initialization complete"); 1835 carp_setrun(sc, 0); 1836 break; 1837 case BACKUP: 1838 callout_stop(&sc->sc_ad_tmo); 1839 1840 switch (sc->sc_version) { 1841 case CARP_VERSION_CARP: 1842 tv.tv_sec = 3 * sc->sc_advbase; 1843 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1844 timeout = tvtohz(&tv); 1845 break; 1846 case CARP_VERSION_VRRPv3: 1847 /* skew time */ 1848 timeout = (256 - sc->sc_vrrp_prio) * 1849 sc->sc_vrrp_master_inter / 256; 1850 timeout += (3 * sc->sc_vrrp_master_inter); 1851 timeout *= hz; 1852 timeout /= 100; /* master interval is in centiseconds */ 1853 break; 1854 } 1855 switch (af) { 1856 #ifdef INET 1857 case AF_INET: 1858 callout_reset(&sc->sc_md_tmo, timeout, 1859 carp_master_down, sc); 1860 break; 1861 #endif 1862 #ifdef INET6 1863 case AF_INET6: 1864 callout_reset(&sc->sc_md6_tmo, timeout, 1865 carp_master_down, sc); 1866 break; 1867 #endif 1868 default: 1869 #ifdef INET 1870 if (sc->sc_naddrs) 1871 callout_reset(&sc->sc_md_tmo, timeout, 1872 carp_master_down, sc); 1873 #endif 1874 #ifdef INET6 1875 if (sc->sc_naddrs6) 1876 callout_reset(&sc->sc_md6_tmo, timeout, 1877 carp_master_down, sc); 1878 #endif 1879 break; 1880 } 1881 break; 1882 case MASTER: 1883 switch (sc->sc_version) { 1884 case CARP_VERSION_CARP: 1885 tv.tv_sec = sc->sc_advbase; 1886 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1887 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1888 carp_callout, sc); 1889 break; 1890 case CARP_VERSION_VRRPv3: 1891 callout_reset(&sc->sc_ad_tmo, 1892 sc->sc_vrrp_adv_inter * hz / 100, 1893 carp_callout, sc); 1894 break; 1895 } 1896 break; 1897 } 1898 } 1899 1900 /* 1901 * Setup multicast structures. 1902 */ 1903 static int 1904 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1905 { 1906 struct ifnet *ifp = cif->cif_ifp; 1907 int error = 0; 1908 1909 switch (sa) { 1910 #ifdef INET 1911 case AF_INET: 1912 { 1913 struct ip_moptions *imo = &cif->cif_imo; 1914 struct in_mfilter *imf; 1915 struct in_addr addr; 1916 1917 if (ip_mfilter_first(&imo->imo_head) != NULL) 1918 return (0); 1919 1920 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1921 ip_mfilter_init(&imo->imo_head); 1922 imo->imo_multicast_vif = -1; 1923 1924 addr.s_addr = htonl(INADDR_CARP_GROUP); 1925 if ((error = in_joingroup(ifp, &addr, NULL, 1926 &imf->imf_inm)) != 0) { 1927 ip_mfilter_free(imf); 1928 break; 1929 } 1930 1931 ip_mfilter_insert(&imo->imo_head, imf); 1932 imo->imo_multicast_ifp = ifp; 1933 imo->imo_multicast_ttl = CARP_DFLTTL; 1934 imo->imo_multicast_loop = 0; 1935 break; 1936 } 1937 #endif 1938 #ifdef INET6 1939 case AF_INET6: 1940 { 1941 struct ip6_moptions *im6o = &cif->cif_im6o; 1942 struct in6_mfilter *im6f[2]; 1943 struct in6_addr in6; 1944 1945 if (ip6_mfilter_first(&im6o->im6o_head)) 1946 return (0); 1947 1948 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1949 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1950 1951 ip6_mfilter_init(&im6o->im6o_head); 1952 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1953 im6o->im6o_multicast_ifp = ifp; 1954 1955 /* Join IPv6 CARP multicast group. */ 1956 bzero(&in6, sizeof(in6)); 1957 in6.s6_addr16[0] = htons(0xff02); 1958 in6.s6_addr8[15] = 0x12; 1959 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1960 ip6_mfilter_free(im6f[0]); 1961 ip6_mfilter_free(im6f[1]); 1962 break; 1963 } 1964 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1965 ip6_mfilter_free(im6f[0]); 1966 ip6_mfilter_free(im6f[1]); 1967 break; 1968 } 1969 1970 /* Join solicited multicast address. */ 1971 bzero(&in6, sizeof(in6)); 1972 in6.s6_addr16[0] = htons(0xff02); 1973 in6.s6_addr32[1] = 0; 1974 in6.s6_addr32[2] = htonl(1); 1975 in6.s6_addr32[3] = 0; 1976 in6.s6_addr8[12] = 0xff; 1977 1978 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1979 ip6_mfilter_free(im6f[0]); 1980 ip6_mfilter_free(im6f[1]); 1981 break; 1982 } 1983 1984 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1985 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1986 ip6_mfilter_free(im6f[0]); 1987 ip6_mfilter_free(im6f[1]); 1988 break; 1989 } 1990 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1991 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1992 break; 1993 } 1994 #endif 1995 } 1996 1997 return (error); 1998 } 1999 2000 /* 2001 * Free multicast structures. 2002 */ 2003 static void 2004 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 2005 { 2006 #ifdef INET 2007 struct ip_moptions *imo = &cif->cif_imo; 2008 struct in_mfilter *imf; 2009 #endif 2010 #ifdef INET6 2011 struct ip6_moptions *im6o = &cif->cif_im6o; 2012 struct in6_mfilter *im6f; 2013 #endif 2014 sx_assert(&carp_sx, SA_XLOCKED); 2015 2016 switch (sa) { 2017 #ifdef INET 2018 case AF_INET: 2019 if (cif->cif_naddrs != 0) 2020 break; 2021 2022 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2023 ip_mfilter_remove(&imo->imo_head, imf); 2024 in_leavegroup(imf->imf_inm, NULL); 2025 ip_mfilter_free(imf); 2026 } 2027 break; 2028 #endif 2029 #ifdef INET6 2030 case AF_INET6: 2031 if (cif->cif_naddrs6 != 0) 2032 break; 2033 2034 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2035 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2036 in6_leavegroup(im6f->im6f_in6m, NULL); 2037 ip6_mfilter_free(im6f); 2038 } 2039 break; 2040 #endif 2041 } 2042 } 2043 2044 int 2045 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 2046 { 2047 struct m_tag *mtag; 2048 int vhid; 2049 2050 if (!sa) 2051 return (0); 2052 2053 switch (sa->sa_family) { 2054 #ifdef INET 2055 case AF_INET: 2056 break; 2057 #endif 2058 #ifdef INET6 2059 case AF_INET6: 2060 break; 2061 #endif 2062 default: 2063 return (0); 2064 } 2065 2066 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2067 if (mtag == NULL) 2068 return (0); 2069 2070 bcopy(mtag + 1, &vhid, sizeof(vhid)); 2071 2072 /* Set the source MAC address to the Virtual Router MAC Address. */ 2073 switch (ifp->if_type) { 2074 case IFT_ETHER: 2075 case IFT_BRIDGE: 2076 case IFT_L2VLAN: { 2077 struct ether_header *eh; 2078 2079 eh = mtod(m, struct ether_header *); 2080 eh->ether_shost[0] = 0; 2081 eh->ether_shost[1] = 0; 2082 eh->ether_shost[2] = 0x5e; 2083 eh->ether_shost[3] = 0; 2084 eh->ether_shost[4] = 1; 2085 eh->ether_shost[5] = vhid; 2086 } 2087 break; 2088 default: 2089 printf("%s: carp is not supported for the %d interface type\n", 2090 if_name(ifp), ifp->if_type); 2091 return (EOPNOTSUPP); 2092 } 2093 2094 return (0); 2095 } 2096 2097 static struct carp_softc* 2098 carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) 2099 { 2100 struct carp_softc *sc; 2101 struct carp_if *cif; 2102 2103 sx_assert(&carp_sx, SA_XLOCKED); 2104 2105 if ((cif = ifp->if_carp) == NULL) 2106 cif = carp_alloc_if(ifp); 2107 2108 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); 2109 *sc = (struct carp_softc ){ 2110 .sc_vhid = vhid, 2111 .sc_version = version, 2112 .sc_state = INIT, 2113 .sc_carpdev = ifp, 2114 .sc_ifasiz = sizeof(struct ifaddr *), 2115 .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, 2116 }; 2117 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 2118 2119 switch (version) { 2120 case CARP_VERSION_CARP: 2121 sc->sc_advbase = CARP_DFLTINTV; 2122 sc->sc_init_counter = true; 2123 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 2124 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 2125 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 2126 break; 2127 case CARP_VERSION_VRRPv3: 2128 sc->sc_vrrp_adv_inter = 100; 2129 sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; 2130 sc->sc_vrrp_prio = 100; 2131 break; 2132 } 2133 2134 CARP_LOCK_INIT(sc); 2135 #ifdef INET 2136 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2137 #endif 2138 #ifdef INET6 2139 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2140 #endif 2141 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2142 2143 CIF_LOCK(cif); 2144 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 2145 CIF_UNLOCK(cif); 2146 2147 mtx_lock(&carp_mtx); 2148 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 2149 mtx_unlock(&carp_mtx); 2150 2151 return (sc); 2152 } 2153 2154 static void 2155 carp_grow_ifas(struct carp_softc *sc) 2156 { 2157 struct ifaddr **new; 2158 2159 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 2160 CARP_LOCK(sc); 2161 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 2162 free(sc->sc_ifas, M_CARP); 2163 sc->sc_ifas = new; 2164 sc->sc_ifasiz *= 2; 2165 CARP_UNLOCK(sc); 2166 } 2167 2168 static void 2169 carp_destroy(struct carp_softc *sc) 2170 { 2171 struct ifnet *ifp = sc->sc_carpdev; 2172 struct carp_if *cif = ifp->if_carp; 2173 2174 sx_assert(&carp_sx, SA_XLOCKED); 2175 2176 if (sc->sc_suppress) 2177 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 2178 CARP_UNLOCK(sc); 2179 2180 CIF_LOCK(cif); 2181 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 2182 CIF_UNLOCK(cif); 2183 2184 mtx_lock(&carp_mtx); 2185 LIST_REMOVE(sc, sc_next); 2186 mtx_unlock(&carp_mtx); 2187 2188 callout_drain(&sc->sc_ad_tmo); 2189 #ifdef INET 2190 callout_drain(&sc->sc_md_tmo); 2191 #endif 2192 #ifdef INET6 2193 callout_drain(&sc->sc_md6_tmo); 2194 #endif 2195 CARP_LOCK_DESTROY(sc); 2196 2197 free(sc->sc_ifas, M_CARP); 2198 free(sc, M_CARP); 2199 } 2200 2201 static struct carp_if* 2202 carp_alloc_if(struct ifnet *ifp) 2203 { 2204 struct carp_if *cif; 2205 int error; 2206 2207 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 2208 2209 if ((error = ifpromisc(ifp, 1)) != 0) 2210 printf("%s: ifpromisc(%s) failed: %d\n", 2211 __func__, if_name(ifp), error); 2212 else 2213 cif->cif_flags |= CIF_PROMISC; 2214 2215 CIF_LOCK_INIT(cif); 2216 cif->cif_ifp = ifp; 2217 TAILQ_INIT(&cif->cif_vrs); 2218 2219 IF_ADDR_WLOCK(ifp); 2220 ifp->if_carp = cif; 2221 if_ref(ifp); 2222 IF_ADDR_WUNLOCK(ifp); 2223 2224 return (cif); 2225 } 2226 2227 static void 2228 carp_free_if(struct carp_if *cif) 2229 { 2230 struct ifnet *ifp = cif->cif_ifp; 2231 2232 CIF_LOCK_ASSERT(cif); 2233 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 2234 __func__)); 2235 2236 IF_ADDR_WLOCK(ifp); 2237 ifp->if_carp = NULL; 2238 IF_ADDR_WUNLOCK(ifp); 2239 2240 CIF_LOCK_DESTROY(cif); 2241 2242 if (cif->cif_flags & CIF_PROMISC) 2243 ifpromisc(ifp, 0); 2244 if_rele(ifp); 2245 2246 free(cif, M_CARP); 2247 } 2248 2249 static bool 2250 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 2251 { 2252 struct carpreq *carpr = arg; 2253 2254 CARP_LOCK(sc); 2255 carpr->carpr_state = sc->sc_state; 2256 carpr->carpr_vhid = sc->sc_vhid; 2257 switch (sc->sc_version) { 2258 case CARP_VERSION_CARP: 2259 carpr->carpr_advbase = sc->sc_advbase; 2260 carpr->carpr_advskew = sc->sc_advskew; 2261 if (priv) 2262 bcopy(sc->sc_key, carpr->carpr_key, 2263 sizeof(carpr->carpr_key)); 2264 else 2265 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 2266 break; 2267 case CARP_VERSION_VRRPv3: 2268 break; 2269 } 2270 CARP_UNLOCK(sc); 2271 2272 return (true); 2273 } 2274 2275 static int 2276 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 2277 { 2278 struct epoch_tracker et; 2279 struct carp_softc *sc = NULL; 2280 int error = 0; 2281 2282 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) 2283 return (EINVAL); 2284 2285 switch (carpr->carpr_version) { 2286 case CARP_VERSION_CARP: 2287 if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || 2288 carpr->carpr_advbase < CARP_DFLTINTV)) 2289 return (EINVAL); 2290 if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) 2291 return (EINVAL); 2292 break; 2293 case CARP_VERSION_VRRPv3: 2294 /* XXXGL: shouldn't we check anything? */ 2295 break; 2296 default: 2297 return (EINVAL); 2298 } 2299 2300 if (ifp->if_carp) { 2301 IFNET_FOREACH_CARP(ifp, sc) 2302 if (sc->sc_vhid == carpr->carpr_vhid) 2303 break; 2304 } 2305 2306 if (sc == NULL) 2307 sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); 2308 else if (sc->sc_version != carpr->carpr_version) 2309 return (EINVAL); 2310 2311 CARP_LOCK(sc); 2312 switch (sc->sc_version) { 2313 case CARP_VERSION_CARP: 2314 if (carpr->carpr_advbase != 0) 2315 sc->sc_advbase = carpr->carpr_advbase; 2316 sc->sc_advskew = carpr->carpr_advskew; 2317 if (carpr->carpr_addr.s_addr != INADDR_ANY) 2318 sc->sc_carpaddr = carpr->carpr_addr; 2319 if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 2320 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 2321 sizeof(sc->sc_carpaddr6)); 2322 } 2323 if (carpr->carpr_key[0] != '\0') { 2324 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2325 carp_hmac_prepare(sc); 2326 } 2327 break; 2328 case CARP_VERSION_VRRPv3: 2329 if (carpr->carpr_vrrp_priority != 0) 2330 sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; 2331 if (carpr->carpr_vrrp_adv_inter) 2332 sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; 2333 break; 2334 } 2335 2336 if (sc->sc_state != INIT && 2337 carpr->carpr_state != sc->sc_state) { 2338 switch (carpr->carpr_state) { 2339 case BACKUP: 2340 callout_stop(&sc->sc_ad_tmo); 2341 carp_set_state(sc, BACKUP, 2342 "user requested via ifconfig"); 2343 carp_setrun(sc, 0); 2344 carp_delroute(sc); 2345 break; 2346 case MASTER: 2347 NET_EPOCH_ENTER(et); 2348 carp_master_down_locked(sc, 2349 "user requested via ifconfig"); 2350 NET_EPOCH_EXIT(et); 2351 break; 2352 default: 2353 break; 2354 } 2355 } 2356 CARP_UNLOCK(sc); 2357 2358 return (error); 2359 } 2360 2361 static int 2362 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 2363 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 2364 { 2365 int priveleged; 2366 struct carp_softc *sc; 2367 2368 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 2369 return (EINVAL); 2370 if (carpr->carpr_count < 1) 2371 return (EMSGSIZE); 2372 if (ifp->if_carp == NULL) 2373 return (ENOENT); 2374 2375 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 2376 if (carpr->carpr_vhid != 0) { 2377 IFNET_FOREACH_CARP(ifp, sc) 2378 if (sc->sc_vhid == carpr->carpr_vhid) 2379 break; 2380 if (sc == NULL) 2381 return (ENOENT); 2382 2383 if (! outfn(arg, sc, priveleged)) 2384 return (ENOMEM); 2385 carpr->carpr_count = 1; 2386 } else { 2387 int count; 2388 2389 count = 0; 2390 IFNET_FOREACH_CARP(ifp, sc) 2391 count++; 2392 2393 if (count > carpr->carpr_count) 2394 return (EMSGSIZE); 2395 2396 IFNET_FOREACH_CARP(ifp, sc) { 2397 if (! outfn(arg, sc, priveleged)) 2398 return (ENOMEM); 2399 carpr->carpr_count = count; 2400 } 2401 } 2402 2403 return (0); 2404 } 2405 2406 int 2407 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 2408 { 2409 struct carpreq carpr; 2410 struct carpkreq carprk = { 2411 .carpr_version = CARP_VERSION_CARP, 2412 }; 2413 struct ifnet *ifp; 2414 int error = 0; 2415 2416 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 2417 return (error); 2418 2419 ifp = ifunit_ref(ifr->ifr_name); 2420 if ((error = carp_is_supported_if(ifp)) != 0) 2421 goto out; 2422 2423 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2424 error = EADDRNOTAVAIL; 2425 goto out; 2426 } 2427 2428 sx_xlock(&carp_sx); 2429 switch (cmd) { 2430 case SIOCSVH: 2431 if ((error = priv_check(td, PRIV_NETINET_CARP))) 2432 break; 2433 2434 memcpy(&carprk, &carpr, sizeof(carpr)); 2435 error = carp_ioctl_set(ifp, &carprk); 2436 break; 2437 2438 case SIOCGVH: 2439 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 2440 carp_carprcp, &carpr); 2441 if (error == 0) { 2442 error = copyout(&carpr, 2443 (char *)ifr_data_get_ptr(ifr), 2444 carpr.carpr_count * sizeof(carpr)); 2445 } 2446 break; 2447 default: 2448 error = EINVAL; 2449 } 2450 sx_xunlock(&carp_sx); 2451 2452 out: 2453 if (ifp != NULL) 2454 if_rele(ifp); 2455 2456 return (error); 2457 } 2458 2459 static int 2460 carp_get_vhid(struct ifaddr *ifa) 2461 { 2462 2463 if (ifa == NULL || ifa->ifa_carp == NULL) 2464 return (0); 2465 2466 return (ifa->ifa_carp->sc_vhid); 2467 } 2468 2469 int 2470 carp_attach(struct ifaddr *ifa, int vhid) 2471 { 2472 struct ifnet *ifp = ifa->ifa_ifp; 2473 struct carp_if *cif = ifp->if_carp; 2474 struct carp_softc *sc; 2475 int index, error; 2476 2477 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 2478 2479 switch (ifa->ifa_addr->sa_family) { 2480 #ifdef INET 2481 case AF_INET: 2482 #endif 2483 #ifdef INET6 2484 case AF_INET6: 2485 #endif 2486 break; 2487 default: 2488 return (EPROTOTYPE); 2489 } 2490 2491 sx_xlock(&carp_sx); 2492 if (ifp->if_carp == NULL) { 2493 sx_xunlock(&carp_sx); 2494 return (ENOPROTOOPT); 2495 } 2496 2497 IFNET_FOREACH_CARP(ifp, sc) 2498 if (sc->sc_vhid == vhid) 2499 break; 2500 if (sc == NULL) { 2501 sx_xunlock(&carp_sx); 2502 return (ENOENT); 2503 } 2504 2505 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2506 if (error) { 2507 CIF_FREE(cif); 2508 sx_xunlock(&carp_sx); 2509 return (error); 2510 } 2511 2512 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2513 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2514 carp_grow_ifas(sc); 2515 2516 switch (ifa->ifa_addr->sa_family) { 2517 #ifdef INET 2518 case AF_INET: 2519 cif->cif_naddrs++; 2520 sc->sc_naddrs++; 2521 break; 2522 #endif 2523 #ifdef INET6 2524 case AF_INET6: 2525 cif->cif_naddrs6++; 2526 sc->sc_naddrs6++; 2527 break; 2528 #endif 2529 } 2530 2531 ifa_ref(ifa); 2532 2533 CARP_LOCK(sc); 2534 sc->sc_ifas[index - 1] = ifa; 2535 ifa->ifa_carp = sc; 2536 if (sc->sc_version == CARP_VERSION_CARP) 2537 carp_hmac_prepare(sc); 2538 carp_sc_state(sc); 2539 CARP_UNLOCK(sc); 2540 2541 sx_xunlock(&carp_sx); 2542 2543 return (0); 2544 } 2545 2546 void 2547 carp_detach(struct ifaddr *ifa, bool keep_cif) 2548 { 2549 struct ifnet *ifp = ifa->ifa_ifp; 2550 struct carp_if *cif = ifp->if_carp; 2551 struct carp_softc *sc = ifa->ifa_carp; 2552 int i, index; 2553 2554 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2555 2556 sx_xlock(&carp_sx); 2557 2558 CARP_LOCK(sc); 2559 /* Shift array. */ 2560 index = sc->sc_naddrs + sc->sc_naddrs6; 2561 for (i = 0; i < index; i++) 2562 if (sc->sc_ifas[i] == ifa) 2563 break; 2564 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2565 for (; i < index - 1; i++) 2566 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2567 sc->sc_ifas[index - 1] = NULL; 2568 2569 switch (ifa->ifa_addr->sa_family) { 2570 #ifdef INET 2571 case AF_INET: 2572 cif->cif_naddrs--; 2573 sc->sc_naddrs--; 2574 break; 2575 #endif 2576 #ifdef INET6 2577 case AF_INET6: 2578 cif->cif_naddrs6--; 2579 sc->sc_naddrs6--; 2580 break; 2581 #endif 2582 } 2583 2584 carp_ifa_delroute(ifa); 2585 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2586 2587 ifa->ifa_carp = NULL; 2588 ifa_free(ifa); 2589 2590 if (sc->sc_version == CARP_VERSION_CARP) 2591 carp_hmac_prepare(sc); 2592 carp_sc_state(sc); 2593 2594 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2595 carp_destroy(sc); 2596 else 2597 CARP_UNLOCK(sc); 2598 2599 if (!keep_cif) 2600 CIF_FREE(cif); 2601 2602 sx_xunlock(&carp_sx); 2603 } 2604 2605 static void 2606 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2607 { 2608 2609 CARP_LOCK_ASSERT(sc); 2610 2611 if (sc->sc_state != state) { 2612 const char *carp_states[] = { CARP_STATES }; 2613 char subsys[IFNAMSIZ+5]; 2614 2615 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2616 if_name(sc->sc_carpdev)); 2617 2618 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2619 carp_states[sc->sc_state], carp_states[state], reason); 2620 2621 sc->sc_state = state; 2622 2623 devctl_notify("CARP", subsys, carp_states[state], NULL); 2624 } 2625 } 2626 2627 static void 2628 carp_linkstate(struct ifnet *ifp) 2629 { 2630 struct carp_softc *sc; 2631 2632 CIF_LOCK(ifp->if_carp); 2633 IFNET_FOREACH_CARP(ifp, sc) { 2634 CARP_LOCK(sc); 2635 carp_sc_state(sc); 2636 CARP_UNLOCK(sc); 2637 } 2638 CIF_UNLOCK(ifp->if_carp); 2639 } 2640 2641 static void 2642 carp_sc_state(struct carp_softc *sc) 2643 { 2644 2645 CARP_LOCK_ASSERT(sc); 2646 2647 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2648 !(sc->sc_carpdev->if_flags & IFF_UP) || 2649 !V_carp_allow) { 2650 callout_stop(&sc->sc_ad_tmo); 2651 #ifdef INET 2652 callout_stop(&sc->sc_md_tmo); 2653 #endif 2654 #ifdef INET6 2655 callout_stop(&sc->sc_md6_tmo); 2656 #endif 2657 carp_set_state(sc, INIT, "hardware interface down"); 2658 carp_setrun(sc, 0); 2659 carp_delroute(sc); 2660 if (!sc->sc_suppress) 2661 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2662 sc->sc_suppress = 1; 2663 } else { 2664 carp_set_state(sc, INIT, "hardware interface up"); 2665 carp_setrun(sc, 0); 2666 if (sc->sc_suppress) 2667 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2668 sc->sc_suppress = 0; 2669 } 2670 } 2671 2672 static void 2673 carp_demote_adj(int adj, char *reason) 2674 { 2675 atomic_add_int(&V_carp_demotion, adj); 2676 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2677 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2678 } 2679 2680 static int 2681 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2682 { 2683 int new, error; 2684 struct carp_softc *sc; 2685 2686 new = V_carp_allow; 2687 error = sysctl_handle_int(oidp, &new, 0, req); 2688 if (error || !req->newptr) 2689 return (error); 2690 2691 if (V_carp_allow != new) { 2692 V_carp_allow = new; 2693 2694 mtx_lock(&carp_mtx); 2695 LIST_FOREACH(sc, &carp_list, sc_next) { 2696 CARP_LOCK(sc); 2697 if (curvnet == sc->sc_carpdev->if_vnet) 2698 carp_sc_state(sc); 2699 CARP_UNLOCK(sc); 2700 } 2701 mtx_unlock(&carp_mtx); 2702 } 2703 2704 return (0); 2705 } 2706 2707 static int 2708 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2709 { 2710 int new, error; 2711 2712 new = V_carp_dscp; 2713 error = sysctl_handle_int(oidp, &new, 0, req); 2714 if (error || !req->newptr) 2715 return (error); 2716 2717 if (new < 0 || new > 63) 2718 return (EINVAL); 2719 2720 V_carp_dscp = new; 2721 2722 return (0); 2723 } 2724 2725 static int 2726 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2727 { 2728 int new, error; 2729 2730 new = V_carp_demotion; 2731 error = sysctl_handle_int(oidp, &new, 0, req); 2732 if (error || !req->newptr) 2733 return (error); 2734 2735 carp_demote_adj(new, "sysctl"); 2736 2737 return (0); 2738 } 2739 2740 static int 2741 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2742 { 2743 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2744 return (EINVAL); 2745 2746 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2747 return (0); 2748 } 2749 2750 struct carp_nl_send_args { 2751 struct nlmsghdr *hdr; 2752 struct nl_pstate *npt; 2753 }; 2754 2755 static bool 2756 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2757 { 2758 struct carp_nl_send_args *nlsa = arg; 2759 struct nlmsghdr *hdr = nlsa->hdr; 2760 struct nl_pstate *npt = nlsa->npt; 2761 struct nl_writer *nw = npt->nw; 2762 struct genlmsghdr *ghdr_new; 2763 2764 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2765 nlmsg_abort(nw); 2766 return (false); 2767 } 2768 2769 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2770 if (ghdr_new == NULL) { 2771 nlmsg_abort(nw); 2772 return (false); 2773 } 2774 2775 ghdr_new->cmd = CARP_NL_CMD_GET; 2776 ghdr_new->version = 0; 2777 ghdr_new->reserved = 0; 2778 2779 CARP_LOCK(sc); 2780 2781 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2782 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2783 nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); 2784 switch (sc->sc_version) { 2785 case CARP_VERSION_CARP: 2786 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2787 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2788 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2789 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2790 if (priv) 2791 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), 2792 sc->sc_key); 2793 break; 2794 case CARP_VERSION_VRRPv3: 2795 nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); 2796 nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, 2797 sc->sc_vrrp_adv_inter); 2798 break; 2799 } 2800 2801 CARP_UNLOCK(sc); 2802 2803 if (! nlmsg_end(nw)) { 2804 nlmsg_abort(nw); 2805 return (false); 2806 } 2807 2808 return (true); 2809 } 2810 2811 struct nl_carp_parsed { 2812 unsigned int ifindex; 2813 char *ifname; 2814 uint32_t state; 2815 uint32_t vhid; 2816 int32_t advbase; 2817 int32_t advskew; 2818 char key[CARP_KEY_LEN]; 2819 struct in_addr addr; 2820 struct in6_addr addr6; 2821 carp_version_t version; 2822 uint8_t vrrp_prio; 2823 uint16_t vrrp_adv_inter; 2824 }; 2825 2826 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2827 static const struct nlattr_parser nla_p_set[] = { 2828 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2829 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2830 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2831 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2832 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2833 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2834 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2835 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2836 { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, 2837 { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, 2838 { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, 2839 { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, 2840 }; 2841 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_empty, nla_p_set); 2842 #undef _OUT 2843 2844 2845 static int 2846 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2847 { 2848 struct nl_carp_parsed attrs = { }; 2849 struct carp_nl_send_args args; 2850 struct carpreq carpr = { }; 2851 struct epoch_tracker et; 2852 if_t ifp = NULL; 2853 int error; 2854 2855 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2856 if (error != 0) 2857 return (error); 2858 2859 NET_EPOCH_ENTER(et); 2860 if (attrs.ifname != NULL) 2861 ifp = ifunit_ref(attrs.ifname); 2862 else if (attrs.ifindex != 0) 2863 ifp = ifnet_byindex_ref(attrs.ifindex); 2864 NET_EPOCH_EXIT(et); 2865 2866 if ((error = carp_is_supported_if(ifp)) != 0) 2867 goto out; 2868 2869 hdr->nlmsg_flags |= NLM_F_MULTI; 2870 args.hdr = hdr; 2871 args.npt = npt; 2872 2873 carpr.carpr_vhid = attrs.vhid; 2874 carpr.carpr_count = CARP_MAXVHID; 2875 2876 sx_xlock(&carp_sx); 2877 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2878 carp_nl_send, &args); 2879 sx_xunlock(&carp_sx); 2880 2881 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2882 error = ENOMEM; 2883 2884 out: 2885 if (ifp != NULL) 2886 if_rele(ifp); 2887 2888 return (error); 2889 } 2890 2891 static int 2892 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2893 { 2894 struct nl_carp_parsed attrs = { }; 2895 struct carpkreq carpr; 2896 struct epoch_tracker et; 2897 if_t ifp = NULL; 2898 int error; 2899 2900 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2901 if (error != 0) 2902 return (error); 2903 2904 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2905 return (EINVAL); 2906 if (attrs.state > CARP_MAXSTATE) 2907 return (EINVAL); 2908 if (attrs.version == 0) /* compat with pre-VRRPv3 */ 2909 attrs.version = CARP_VERSION_CARP; 2910 switch (attrs.version) { 2911 case CARP_VERSION_CARP: 2912 if (attrs.advbase < 0 || attrs.advskew < 0) 2913 return (EINVAL); 2914 if (attrs.advbase > 255) 2915 return (EINVAL); 2916 if (attrs.advskew >= 255) 2917 return (EINVAL); 2918 break; 2919 case CARP_VERSION_VRRPv3: 2920 if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) 2921 return (EINVAL); 2922 break; 2923 default: 2924 return (EINVAL); 2925 } 2926 2927 NET_EPOCH_ENTER(et); 2928 if (attrs.ifname != NULL) 2929 ifp = ifunit_ref(attrs.ifname); 2930 else if (attrs.ifindex != 0) 2931 ifp = ifnet_byindex_ref(attrs.ifindex); 2932 NET_EPOCH_EXIT(et); 2933 2934 if ((error = carp_is_supported_if(ifp)) != 0) 2935 goto out; 2936 2937 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2938 error = EADDRNOTAVAIL; 2939 goto out; 2940 } 2941 2942 carpr.carpr_count = 1; 2943 carpr.carpr_vhid = attrs.vhid; 2944 carpr.carpr_state = attrs.state; 2945 carpr.carpr_version = attrs.version; 2946 switch (attrs.version) { 2947 case CARP_VERSION_CARP: 2948 carpr.carpr_advbase = attrs.advbase; 2949 carpr.carpr_advskew = attrs.advskew; 2950 carpr.carpr_addr = attrs.addr; 2951 carpr.carpr_addr6 = attrs.addr6; 2952 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2953 break; 2954 case CARP_VERSION_VRRPv3: 2955 carpr.carpr_vrrp_priority = attrs.vrrp_prio; 2956 carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; 2957 break; 2958 } 2959 2960 sx_xlock(&carp_sx); 2961 error = carp_ioctl_set(ifp, &carpr); 2962 sx_xunlock(&carp_sx); 2963 2964 out: 2965 if (ifp != NULL) 2966 if_rele(ifp); 2967 2968 return (error); 2969 } 2970 2971 static const struct nlhdr_parser *all_parsers[] = { 2972 &carp_parser 2973 }; 2974 2975 static const struct genl_cmd carp_cmds[] = { 2976 { 2977 .cmd_num = CARP_NL_CMD_GET, 2978 .cmd_name = "SIOCGVH", 2979 .cmd_cb = carp_nl_get, 2980 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2981 GENL_CMD_CAP_HASPOL, 2982 }, 2983 { 2984 .cmd_num = CARP_NL_CMD_SET, 2985 .cmd_name = "SIOCSVH", 2986 .cmd_cb = carp_nl_set, 2987 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2988 .cmd_priv = PRIV_NETINET_CARP, 2989 }, 2990 }; 2991 2992 static uint16_t carp_family_id; 2993 static void 2994 carp_nl_register(void) 2995 { 2996 bool ret __diagused; 2997 2998 NL_VERIFY_PARSERS(all_parsers); 2999 carp_family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 3000 CARP_NL_CMD_MAX); 3001 MPASS(carp_family_id != 0); 3002 3003 ret = genl_register_cmds(carp_family_id, carp_cmds, nitems(carp_cmds)); 3004 MPASS(ret); 3005 } 3006 3007 static void 3008 carp_nl_unregister(void) 3009 { 3010 genl_unregister_family(carp_family_id); 3011 } 3012 3013 static void 3014 carp_mod_cleanup(void) 3015 { 3016 3017 carp_nl_unregister(); 3018 3019 #ifdef INET 3020 (void)ipproto_unregister(IPPROTO_CARP); 3021 carp_iamatch_p = NULL; 3022 #endif 3023 #ifdef INET6 3024 (void)ip6proto_unregister(IPPROTO_CARP); 3025 carp_iamatch6_p = NULL; 3026 carp_macmatch6_p = NULL; 3027 #endif 3028 carp_ioctl_p = NULL; 3029 carp_attach_p = NULL; 3030 carp_detach_p = NULL; 3031 carp_get_vhid_p = NULL; 3032 carp_linkstate_p = NULL; 3033 carp_forus_p = NULL; 3034 carp_output_p = NULL; 3035 carp_demote_adj_p = NULL; 3036 carp_master_p = NULL; 3037 mtx_unlock(&carp_mtx); 3038 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 3039 mtx_destroy(&carp_mtx); 3040 sx_destroy(&carp_sx); 3041 } 3042 3043 static void 3044 ipcarp_sysinit(void) 3045 { 3046 3047 /* Load allow as tunable so to postpone carp start after module load */ 3048 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 3049 } 3050 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 3051 3052 static int 3053 carp_mod_load(void) 3054 { 3055 int err; 3056 3057 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 3058 sx_init(&carp_sx, "carp_sx"); 3059 carp_get_vhid_p = carp_get_vhid; 3060 carp_forus_p = carp_forus; 3061 carp_output_p = carp_output; 3062 carp_linkstate_p = carp_linkstate; 3063 carp_ioctl_p = carp_ioctl; 3064 carp_attach_p = carp_attach; 3065 carp_detach_p = carp_detach; 3066 carp_demote_adj_p = carp_demote_adj; 3067 carp_master_p = carp_master; 3068 #ifdef INET6 3069 carp_iamatch6_p = carp_iamatch6; 3070 carp_macmatch6_p = carp_macmatch6; 3071 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 3072 if (err) { 3073 printf("carp: error %d registering with INET6\n", err); 3074 carp_mod_cleanup(); 3075 return (err); 3076 } 3077 #endif 3078 #ifdef INET 3079 carp_iamatch_p = carp_iamatch; 3080 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 3081 if (err) { 3082 printf("carp: error %d registering with INET\n", err); 3083 carp_mod_cleanup(); 3084 return (err); 3085 } 3086 #endif 3087 3088 carp_nl_register(); 3089 3090 return (0); 3091 } 3092 3093 static int 3094 carp_modevent(module_t mod, int type, void *data) 3095 { 3096 switch (type) { 3097 case MOD_LOAD: 3098 return carp_mod_load(); 3099 /* NOTREACHED */ 3100 case MOD_UNLOAD: 3101 mtx_lock(&carp_mtx); 3102 if (LIST_EMPTY(&carp_list)) 3103 carp_mod_cleanup(); 3104 else { 3105 mtx_unlock(&carp_mtx); 3106 return (EBUSY); 3107 } 3108 break; 3109 3110 default: 3111 return (EINVAL); 3112 } 3113 3114 return (0); 3115 } 3116 3117 static moduledata_t carp_mod = { 3118 "carp", 3119 carp_modevent, 3120 0 3121 }; 3122 3123 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 3124