1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/devctl.h> 39 #include <sys/jail.h> 40 #include <sys/kassert.h> 41 #include <sys/kernel.h> 42 #include <sys/limits.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/module.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_dl.h> 59 #include <net/if_llatbl.h> 60 #include <net/if_private.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/vnet.h> 64 65 #if defined(INET) || defined(INET6) 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/ip_carp.h> 69 #include <netinet/ip_carp_nl.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <netlink/netlink.h> 88 #include <netlink/netlink_ctl.h> 89 #include <netlink/netlink_generic.h> 90 #include <netlink/netlink_message_parser.h> 91 92 #include <crypto/sha1.h> 93 94 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 95 96 struct carp_softc { 97 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 98 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 99 carp_version_t sc_version; /* carp or VRRPv3 */ 100 uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ 101 struct callout sc_ad_tmo; /* Advertising timeout. */ 102 #ifdef INET 103 struct callout sc_md_tmo; /* Master down timeout. */ 104 #endif 105 #ifdef INET6 106 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 107 #endif 108 struct mtx sc_mtx; 109 110 int sc_vhid; 111 union { 112 struct { /* sc_version == CARP_VERSION_CARP */ 113 int sc_advskew; 114 int sc_advbase; 115 struct in_addr sc_carpaddr; 116 struct in6_addr sc_carpaddr6; 117 uint64_t sc_counter; 118 bool sc_init_counter; 119 #define CARP_HMAC_PAD 64 120 unsigned char sc_key[CARP_KEY_LEN]; 121 unsigned char sc_pad[CARP_HMAC_PAD]; 122 SHA1_CTX sc_sha1; 123 }; 124 struct { /* sc_version == CARP_VERSION_VRRPv3 */ 125 uint8_t sc_vrrp_prio; 126 uint16_t sc_vrrp_adv_inter; 127 uint16_t sc_vrrp_master_inter; 128 }; 129 }; 130 int sc_naddrs; 131 int sc_naddrs6; 132 int sc_ifasiz; 133 enum { INIT = 0, BACKUP, MASTER } sc_state; 134 int sc_suppress; 135 int sc_sendad_errors; 136 #define CARP_SENDAD_MAX_ERRORS 3 137 int sc_sendad_success; 138 #define CARP_SENDAD_MIN_SUCCESS 3 139 140 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 141 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 142 }; 143 144 struct carp_if { 145 #ifdef INET 146 int cif_naddrs; 147 #endif 148 #ifdef INET6 149 int cif_naddrs6; 150 #endif 151 TAILQ_HEAD(, carp_softc) cif_vrs; 152 #ifdef INET 153 struct ip_moptions cif_imo; 154 #endif 155 #ifdef INET6 156 struct ip6_moptions cif_im6o; 157 #endif 158 struct ifnet *cif_ifp; 159 struct mtx cif_mtx; 160 uint32_t cif_flags; 161 #define CIF_PROMISC 0x00000001 162 }; 163 164 /* Kernel equivalent of struct carpreq, but with more fields for new features. 165 * */ 166 struct carpkreq { 167 int carpr_count; 168 int carpr_vhid; 169 int carpr_state; 170 int carpr_advskew; 171 int carpr_advbase; 172 unsigned char carpr_key[CARP_KEY_LEN]; 173 /* Everything above this is identical to carpreq */ 174 struct in_addr carpr_addr; 175 struct in6_addr carpr_addr6; 176 carp_version_t carpr_version; 177 uint8_t carpr_vrrp_priority; 178 uint16_t carpr_vrrp_adv_inter; 179 }; 180 181 /* 182 * Brief design of carp(4). 183 * 184 * Any carp-capable ifnet may have a list of carp softcs hanging off 185 * its ifp->if_carp pointer. Each softc represents one unique virtual 186 * host id, or vhid. The softc has a back pointer to the ifnet. All 187 * softcs are joined in a global list, which has quite limited use. 188 * 189 * Any interface address that takes part in CARP negotiation has a 190 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 191 * AF_INET or AF_INET6 address. 192 * 193 * Although, one can get the softc's backpointer to ifnet and traverse 194 * through its ifp->if_addrhead queue to find all interface addresses 195 * involved in CARP, we keep a growable array of ifaddr pointers. This 196 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 197 * do calls into the network stack, thus avoiding LORs. 198 * 199 * Locking: 200 * 201 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 202 * callout-driven events and ioctl()s. 203 * 204 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 205 * To traverse the global list we use the mutex carp_mtx. 206 * 207 * Known issues with locking: 208 * 209 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 210 * counting is done on the softc. 211 * - On module unload we may race (?) with packet processing thread 212 * dereferencing our function pointers. 213 */ 214 215 /* Accept incoming CARP packets. */ 216 VNET_DEFINE_STATIC(int, carp_allow) = 1; 217 #define V_carp_allow VNET(carp_allow) 218 219 /* Set DSCP in outgoing CARP packets. */ 220 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 221 #define V_carp_dscp VNET(carp_dscp) 222 223 /* Preempt slower nodes. */ 224 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 225 #define V_carp_preempt VNET(carp_preempt) 226 227 /* Log level. */ 228 VNET_DEFINE_STATIC(int, carp_log) = 1; 229 #define V_carp_log VNET(carp_log) 230 231 /* Global advskew demotion. */ 232 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 233 #define V_carp_demotion VNET(carp_demotion) 234 235 /* Send error demotion factor. */ 236 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 237 #define V_carp_senderr_adj VNET(carp_senderr_adj) 238 239 /* Iface down demotion factor. */ 240 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 241 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 242 243 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 244 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 245 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 246 247 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 248 "CARP"); 249 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 250 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 251 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 252 "Accept incoming CARP packets"); 253 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 254 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 255 0, 0, carp_dscp_sysctl, "I", 256 "DSCP value for carp packets"); 257 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 258 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 259 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 260 &VNET_NAME(carp_log), 0, "CARP log level"); 261 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 262 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 263 0, 0, carp_demote_adj_sysctl, "I", 264 "Adjust demotion factor (skew of advskew)"); 265 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 266 CTLFLAG_VNET | CTLFLAG_RW, 267 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 268 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 269 CTLFLAG_VNET | CTLFLAG_RW, 270 &VNET_NAME(carp_ifdown_adj), 0, 271 "Interface down demotion factor adjustment"); 272 273 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 274 VNET_PCPUSTAT_SYSINIT(carpstats); 275 VNET_PCPUSTAT_SYSUNINIT(carpstats); 276 277 #define CARPSTATS_ADD(name, val) \ 278 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 279 sizeof(uint64_t)], (val)) 280 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 281 282 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 283 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 284 285 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 286 NULL, MTX_DEF) 287 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 288 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 289 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 290 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 291 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 292 NULL, MTX_DEF) 293 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 294 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 295 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 296 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 297 #define CIF_FREE(cif) do { \ 298 CIF_LOCK(cif); \ 299 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 300 carp_free_if(cif); \ 301 else \ 302 CIF_UNLOCK(cif); \ 303 } while (0) 304 305 #define CARP_LOG(...) do { \ 306 if (V_carp_log > 0) \ 307 log(LOG_INFO, "carp: " __VA_ARGS__); \ 308 } while (0) 309 310 #define CARP_DEBUG(...) do { \ 311 if (V_carp_log > 1) \ 312 log(LOG_DEBUG, __VA_ARGS__); \ 313 } while (0) 314 315 #define IFNET_FOREACH_IFA(ifp, ifa) \ 316 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 317 if ((ifa)->ifa_carp != NULL) 318 319 #define CARP_FOREACH_IFA(sc, ifa) \ 320 CARP_LOCK_ASSERT(sc); \ 321 for (int _i = 0; \ 322 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 323 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 324 ++_i) 325 326 #define IFNET_FOREACH_CARP(ifp, sc) \ 327 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 328 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 329 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 330 331 #define DEMOTE_ADVSKEW(sc) \ 332 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 333 CARP_MAXSKEW : \ 334 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 335 0 : ((sc)->sc_advskew + V_carp_demotion))) 336 337 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 338 static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); 339 static struct carp_softc 340 *carp_alloc(struct ifnet *, carp_version_t, int); 341 static void carp_destroy(struct carp_softc *); 342 static struct carp_if 343 *carp_alloc_if(struct ifnet *); 344 static void carp_free_if(struct carp_if *); 345 static void carp_set_state(struct carp_softc *, int, const char* reason); 346 static void carp_sc_state(struct carp_softc *); 347 static void carp_setrun(struct carp_softc *, sa_family_t); 348 static void carp_master_down(void *); 349 static void carp_master_down_locked(struct carp_softc *, 350 const char* reason); 351 static void carp_send_ad_locked(struct carp_softc *); 352 static void vrrp_send_ad_locked(struct carp_softc *); 353 static void carp_addroute(struct carp_softc *); 354 static void carp_ifa_addroute(struct ifaddr *); 355 static void carp_delroute(struct carp_softc *); 356 static void carp_ifa_delroute(struct ifaddr *); 357 static void carp_send_ad_all(void *, int); 358 static void carp_demote_adj(int, char *); 359 360 static LIST_HEAD(, carp_softc) carp_list; 361 static struct mtx carp_mtx; 362 static struct sx carp_sx; 363 static struct task carp_sendall_task = 364 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 365 366 static int 367 carp_is_supported_if(if_t ifp) 368 { 369 if (ifp == NULL) 370 return (ENXIO); 371 372 switch (ifp->if_type) { 373 case IFT_ETHER: 374 case IFT_L2VLAN: 375 case IFT_BRIDGE: 376 break; 377 default: 378 return (EOPNOTSUPP); 379 } 380 381 return (0); 382 } 383 384 static void 385 carp_hmac_prepare(struct carp_softc *sc) 386 { 387 uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; 388 uint8_t vhid = sc->sc_vhid & 0xff; 389 struct ifaddr *ifa; 390 int i, found; 391 #ifdef INET 392 struct in_addr last, cur, in; 393 #endif 394 #ifdef INET6 395 struct in6_addr last6, cur6, in6; 396 #endif 397 398 CARP_LOCK_ASSERT(sc); 399 MPASS(sc->sc_version == CARP_VERSION_CARP); 400 401 /* Compute ipad from key. */ 402 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 403 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 404 for (i = 0; i < sizeof(sc->sc_pad); i++) 405 sc->sc_pad[i] ^= 0x36; 406 407 /* Precompute first part of inner hash. */ 408 SHA1Init(&sc->sc_sha1); 409 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 410 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 411 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 412 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 413 #ifdef INET 414 cur.s_addr = 0; 415 do { 416 found = 0; 417 last = cur; 418 cur.s_addr = 0xffffffff; 419 CARP_FOREACH_IFA(sc, ifa) { 420 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 421 if (ifa->ifa_addr->sa_family == AF_INET && 422 ntohl(in.s_addr) > ntohl(last.s_addr) && 423 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 424 cur.s_addr = in.s_addr; 425 found++; 426 } 427 } 428 if (found) 429 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 430 } while (found); 431 #endif /* INET */ 432 #ifdef INET6 433 memset(&cur6, 0, sizeof(cur6)); 434 do { 435 found = 0; 436 last6 = cur6; 437 memset(&cur6, 0xff, sizeof(cur6)); 438 CARP_FOREACH_IFA(sc, ifa) { 439 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 440 if (IN6_IS_SCOPE_EMBED(&in6)) 441 in6.s6_addr16[1] = 0; 442 if (ifa->ifa_addr->sa_family == AF_INET6 && 443 memcmp(&in6, &last6, sizeof(in6)) > 0 && 444 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 445 cur6 = in6; 446 found++; 447 } 448 } 449 if (found) 450 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 451 } while (found); 452 #endif /* INET6 */ 453 454 /* convert ipad to opad */ 455 for (i = 0; i < sizeof(sc->sc_pad); i++) 456 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 457 } 458 459 static void 460 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 461 unsigned char md[20]) 462 { 463 SHA1_CTX sha1ctx; 464 465 CARP_LOCK_ASSERT(sc); 466 467 /* fetch first half of inner hash */ 468 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 469 470 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 471 SHA1Final(md, &sha1ctx); 472 473 /* outer hash */ 474 SHA1Init(&sha1ctx); 475 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 476 SHA1Update(&sha1ctx, md, 20); 477 SHA1Final(md, &sha1ctx); 478 } 479 480 static int 481 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 482 unsigned char md[20]) 483 { 484 unsigned char md2[20]; 485 486 CARP_LOCK_ASSERT(sc); 487 488 carp_hmac_generate(sc, counter, md2); 489 490 return (bcmp(md, md2, sizeof(md2))); 491 } 492 493 static int 494 vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) 495 { 496 uint16_t cksum; 497 498 /* 499 * Note that VRRPv3 checksums are different from CARP checksums. 500 * Carp just calculates the checksum over the packet. 501 * VRRPv3 includes the pseudo-header checksum as well. 502 */ 503 cksum = in_cksum_skip(m, off + len, off); 504 cksum -= phdrcksum; 505 506 return (cksum); 507 } 508 509 /* 510 * process input packet. 511 * we have rearranged checks order compared to the rfc, 512 * but it seems more efficient this way or not possible otherwise. 513 */ 514 #ifdef INET 515 static int 516 carp_input(struct mbuf **mp, int *offp, int proto) 517 { 518 struct mbuf *m = *mp; 519 struct ip *ip = mtod(m, struct ip *); 520 struct vrrpv3_header *vh; 521 int iplen; 522 int minlen; 523 int totlen; 524 525 iplen = *offp; 526 *mp = NULL; 527 528 CARPSTATS_INC(carps_ipackets); 529 530 if (!V_carp_allow) { 531 m_freem(m); 532 return (IPPROTO_DONE); 533 } 534 535 iplen = ip->ip_hl << 2; 536 totlen = ntohs(ip->ip_len); 537 538 /* Ensure we have enough header to figure out the version. */ 539 if (m->m_pkthdr.len < iplen + sizeof(*vh)) { 540 CARPSTATS_INC(carps_badlen); 541 CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " 542 "on %s\n", __func__, m->m_len - sizeof(struct ip), 543 if_name(m->m_pkthdr.rcvif)); 544 m_freem(m); 545 return (IPPROTO_DONE); 546 } 547 548 if (iplen + sizeof(*vh) < m->m_len) { 549 if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { 550 CARPSTATS_INC(carps_hdrops); 551 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 552 return (IPPROTO_DONE); 553 } 554 ip = mtod(m, struct ip *); 555 } 556 vh = (struct vrrpv3_header *)((char *)ip + iplen); 557 558 switch (vh->vrrp_version) { 559 case CARP_VERSION_CARP: 560 minlen = sizeof(struct carp_header); 561 break; 562 case CARP_VERSION_VRRPv3: 563 minlen = sizeof(struct vrrpv3_header); 564 break; 565 default: 566 CARPSTATS_INC(carps_badver); 567 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 568 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 569 m_freem(m); 570 return (IPPROTO_DONE); 571 } 572 573 /* And now check the length again but with the real minimal length. */ 574 if (m->m_pkthdr.len < iplen + minlen) { 575 CARPSTATS_INC(carps_badlen); 576 CARP_DEBUG("%s: received len %zd < %d " 577 "on %s\n", __func__, m->m_len - sizeof(struct ip), 578 iplen + minlen, 579 if_name(m->m_pkthdr.rcvif)); 580 m_freem(m); 581 return (IPPROTO_DONE); 582 } 583 584 if (iplen + minlen < m->m_len) { 585 if ((m = m_pullup(m, iplen + minlen)) == NULL) { 586 CARPSTATS_INC(carps_hdrops); 587 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 588 return (IPPROTO_DONE); 589 } 590 ip = mtod(m, struct ip *); 591 vh = (struct vrrpv3_header *)((char *)ip + iplen); 592 } 593 594 switch (vh->vrrp_version) { 595 case CARP_VERSION_CARP: { 596 struct carp_header *ch; 597 598 /* verify the CARP checksum */ 599 m->m_data += iplen; 600 if (in_cksum(m, totlen - iplen)) { 601 CARPSTATS_INC(carps_badsum); 602 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 603 if_name(m->m_pkthdr.rcvif)); 604 m_freem(m); 605 break; 606 } 607 m->m_data -= iplen; 608 ch = (struct carp_header *)((char *)ip + iplen); 609 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 610 break; 611 } 612 case CARP_VERSION_VRRPv3: { 613 uint16_t phdrcksum; 614 615 phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 616 htonl((u_short)(totlen - iplen) + ip->ip_p)); 617 vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, 618 phdrcksum); 619 break; 620 } 621 default: 622 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 623 } 624 625 return (IPPROTO_DONE); 626 } 627 #endif 628 629 #ifdef INET6 630 static int 631 carp6_input(struct mbuf **mp, int *offp, int proto) 632 { 633 struct mbuf *m = *mp; 634 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 635 struct vrrpv3_header *vh; 636 u_int len, minlen; 637 638 CARPSTATS_INC(carps_ipackets6); 639 640 if (!V_carp_allow) { 641 m_freem(m); 642 return (IPPROTO_DONE); 643 } 644 645 /* check if received on a valid carp interface */ 646 if (m->m_pkthdr.rcvif->if_carp == NULL) { 647 CARPSTATS_INC(carps_badif); 648 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 649 __func__, if_name(m->m_pkthdr.rcvif)); 650 m_freem(m); 651 return (IPPROTO_DONE); 652 } 653 654 if (m->m_len < *offp + sizeof(*vh)) { 655 len = m->m_len; 656 m = m_pullup(m, *offp + sizeof(*vh)); 657 if (m == NULL) { 658 CARPSTATS_INC(carps_badlen); 659 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 660 return (IPPROTO_DONE); 661 } 662 ip6 = mtod(m, struct ip6_hdr *); 663 } 664 vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); 665 666 switch (vh->vrrp_version) { 667 case CARP_VERSION_CARP: 668 minlen = sizeof(struct carp_header); 669 break; 670 case CARP_VERSION_VRRPv3: 671 minlen = sizeof(struct vrrpv3_header); 672 break; 673 default: 674 CARPSTATS_INC(carps_badver); 675 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 676 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 677 m_freem(m); 678 return (IPPROTO_DONE); 679 } 680 681 /* And now check the length again but with the real minimal length. */ 682 if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { 683 CARPSTATS_INC(carps_badlen); 684 CARP_DEBUG("%s: received len %zd < %zd " 685 "on %s\n", __func__, m->m_len - sizeof(struct ip), 686 sizeof(*ip6) + minlen, 687 if_name(m->m_pkthdr.rcvif)); 688 m_freem(m); 689 return (IPPROTO_DONE); 690 } 691 692 if (sizeof (*ip6) + minlen < m->m_len) { 693 if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { 694 CARPSTATS_INC(carps_hdrops); 695 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 696 return (IPPROTO_DONE); 697 } 698 ip6 = mtod(m, struct ip6_hdr *); 699 vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 700 } 701 702 switch (vh->vrrp_version) { 703 case CARP_VERSION_CARP: { 704 struct carp_header *ch; 705 706 /* verify the CARP checksum */ 707 m->m_data += *offp; 708 if (in_cksum(m, sizeof(struct carp_header))) { 709 CARPSTATS_INC(carps_badsum); 710 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 711 if_name(m->m_pkthdr.rcvif)); 712 m_freem(m); 713 break; 714 } 715 m->m_data -= *offp; 716 ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); 717 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 718 break; 719 } 720 case CARP_VERSION_VRRPv3: { 721 uint16_t phdrcksum; 722 723 phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 724 ip6->ip6_nxt, 0); 725 vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, 726 ntohs(ip6->ip6_plen), phdrcksum); 727 break; 728 } 729 default: 730 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 731 } 732 return (IPPROTO_DONE); 733 } 734 #endif /* INET6 */ 735 736 /* 737 * This routine should not be necessary at all, but some switches 738 * (VMWare ESX vswitches) can echo our own packets back at us, 739 * and we must ignore them or they will cause us to drop out of 740 * MASTER mode. 741 * 742 * We cannot catch all cases of network loops. Instead, what we 743 * do here is catch any packet that arrives with a carp header 744 * with a VHID of 0, that comes from an address that is our own. 745 * These packets are by definition "from us" (even if they are from 746 * a misconfigured host that is pretending to be us). 747 * 748 * The VHID test is outside this mini-function. 749 */ 750 static int 751 carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 752 { 753 #ifdef INET 754 struct ip *ip4; 755 struct in_addr in4; 756 #endif 757 #ifdef INET6 758 struct ip6_hdr *ip6; 759 struct in6_addr in6; 760 #endif 761 762 switch (af) { 763 #ifdef INET 764 case AF_INET: 765 ip4 = mtod(m, struct ip *); 766 in4 = ifatoia(ifa)->ia_addr.sin_addr; 767 return (in4.s_addr == ip4->ip_src.s_addr); 768 #endif 769 #ifdef INET6 770 case AF_INET6: 771 ip6 = mtod(m, struct ip6_hdr *); 772 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 773 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 774 #endif 775 default: 776 break; 777 } 778 return (0); 779 } 780 781 static struct ifaddr * 782 carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) 783 { 784 struct ifnet *ifp = m->m_pkthdr.rcvif; 785 struct ifaddr *ifa, *match; 786 int error; 787 788 NET_EPOCH_ASSERT(); 789 790 /* 791 * Verify that the VHID is valid on the receiving interface. 792 * 793 * There should be just one match. If there are none 794 * the VHID is not valid and we drop the packet. If 795 * there are multiple VHID matches, take just the first 796 * one, for compatibility with previous code. While we're 797 * scanning, check for obvious loops in the network topology 798 * (these should never happen, and as noted above, we may 799 * miss real loops; this is just a double-check). 800 */ 801 error = 0; 802 match = NULL; 803 IFNET_FOREACH_IFA(ifp, ifa) { 804 if (match == NULL && ifa->ifa_carp != NULL && 805 ifa->ifa_addr->sa_family == af && 806 ifa->ifa_carp->sc_vhid == vhid) 807 match = ifa; 808 if (vhid == 0 && carp_source_is_self(m, ifa, af)) 809 error = ELOOP; 810 } 811 ifa = error ? NULL : match; 812 if (ifa != NULL) 813 ifa_ref(ifa); 814 815 if (ifa == NULL) { 816 if (error == ELOOP) { 817 CARP_DEBUG("dropping looped packet on interface %s\n", 818 if_name(ifp)); 819 CARPSTATS_INC(carps_badif); /* ??? */ 820 } else { 821 CARPSTATS_INC(carps_badvhid); 822 } 823 } 824 825 return (ifa); 826 } 827 828 static void 829 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 830 { 831 struct ifnet *ifp = m->m_pkthdr.rcvif; 832 struct ifaddr *ifa; 833 struct carp_softc *sc; 834 uint64_t tmp_counter; 835 struct timeval sc_tv, ch_tv; 836 bool multicast = false; 837 838 NET_EPOCH_ASSERT(); 839 MPASS(ch->carp_version == CARP_VERSION_CARP); 840 841 ifa = carp_find_ifa(m, af, ch->carp_vhid); 842 if (ifa == NULL) { 843 m_freem(m); 844 return; 845 } 846 847 sc = ifa->ifa_carp; 848 CARP_LOCK(sc); 849 850 /* verify the CARP version. */ 851 if (sc->sc_version != CARP_VERSION_CARP) { 852 CARP_UNLOCK(sc); 853 854 CARPSTATS_INC(carps_badver); 855 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 856 ch->carp_version); 857 ifa_free(ifa); 858 m_freem(m); 859 return; 860 } 861 862 if (ifa->ifa_addr->sa_family == AF_INET) { 863 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); 864 } else { 865 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 866 } 867 ifa_free(ifa); 868 869 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 870 if (multicast && ttl != CARP_DFLTTL) { 871 CARPSTATS_INC(carps_badttl); 872 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 873 ttl, if_name(m->m_pkthdr.rcvif)); 874 goto out; 875 } 876 877 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 878 CARPSTATS_INC(carps_badauth); 879 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 880 sc->sc_vhid, if_name(ifp)); 881 goto out; 882 } 883 884 tmp_counter = ntohl(ch->carp_counter[0]); 885 tmp_counter = tmp_counter<<32; 886 tmp_counter += ntohl(ch->carp_counter[1]); 887 888 /* XXX Replay protection goes here */ 889 890 sc->sc_init_counter = false; 891 sc->sc_counter = tmp_counter; 892 893 sc_tv.tv_sec = sc->sc_advbase; 894 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 895 ch_tv.tv_sec = ch->carp_advbase; 896 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 897 898 switch (sc->sc_state) { 899 case INIT: 900 break; 901 case MASTER: 902 /* 903 * If we receive an advertisement from a master who's going to 904 * be more frequent than us, go into BACKUP state. 905 */ 906 if (timevalcmp(&sc_tv, &ch_tv, >) || 907 timevalcmp(&sc_tv, &ch_tv, ==)) { 908 callout_stop(&sc->sc_ad_tmo); 909 carp_set_state(sc, BACKUP, 910 "more frequent advertisement received"); 911 carp_setrun(sc, 0); 912 carp_delroute(sc); 913 } 914 break; 915 case BACKUP: 916 /* 917 * If we're pre-empting masters who advertise slower than us, 918 * and this one claims to be slower, treat him as down. 919 */ 920 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 921 carp_master_down_locked(sc, 922 "preempting a slower master"); 923 break; 924 } 925 926 /* 927 * If the master is going to advertise at such a low frequency 928 * that he's guaranteed to time out, we'd might as well just 929 * treat him as timed out now. 930 */ 931 sc_tv.tv_sec = sc->sc_advbase * 3; 932 if (timevalcmp(&sc_tv, &ch_tv, <)) { 933 carp_master_down_locked(sc, "master will time out"); 934 break; 935 } 936 937 /* 938 * Otherwise, we reset the counter and wait for the next 939 * advertisement. 940 */ 941 carp_setrun(sc, af); 942 break; 943 } 944 945 out: 946 CARP_UNLOCK(sc); 947 m_freem(m); 948 } 949 950 static void 951 vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, 952 int len, uint16_t phdrcksum) 953 { 954 struct vrrpv3_header *vh = mtodo(m, off); 955 struct ifnet *ifp = m->m_pkthdr.rcvif; 956 struct ifaddr *ifa; 957 struct carp_softc *sc; 958 959 NET_EPOCH_ASSERT(); 960 MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); 961 962 ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); 963 if (ifa == NULL) { 964 m_freem(m); 965 return; 966 } 967 968 sc = ifa->ifa_carp; 969 CARP_LOCK(sc); 970 971 ifa_free(ifa); 972 973 /* verify the CARP version. */ 974 if (sc->sc_version != CARP_VERSION_VRRPv3) { 975 CARP_UNLOCK(sc); 976 977 CARPSTATS_INC(carps_badver); 978 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 979 vh->vrrp_version); 980 m_freem(m); 981 return; 982 } 983 984 /* verify that the IP TTL is 255. */ 985 if (ttl != CARP_DFLTTL) { 986 CARPSTATS_INC(carps_badttl); 987 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 988 ttl, if_name(m->m_pkthdr.rcvif)); 989 goto out; 990 } 991 992 if (vrrp_checksum_verify(m, off, len, phdrcksum)) { 993 CARPSTATS_INC(carps_badsum); 994 CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, 995 sc->sc_vhid, if_name(ifp)); 996 goto out; 997 } 998 999 /* RFC9568, 7.1 Receiving VRRP packets. */ 1000 if (sc->sc_vrrp_prio == 255) { 1001 CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", 1002 __func__); 1003 goto out; 1004 } 1005 1006 /* XXX TODO Check IP address payload. */ 1007 1008 sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); 1009 1010 switch (sc->sc_state) { 1011 case INIT: 1012 break; 1013 case MASTER: 1014 /* 1015 * If we receive an advertisement from a master who's going to 1016 * be more frequent than us, go into BACKUP state. 1017 * Same if the peer has a higher priority than us. 1018 */ 1019 if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || 1020 vh->vrrp_priority > sc->sc_vrrp_prio) { 1021 callout_stop(&sc->sc_ad_tmo); 1022 carp_set_state(sc, BACKUP, 1023 "more frequent advertisement received"); 1024 carp_setrun(sc, 0); 1025 carp_delroute(sc); 1026 } 1027 break; 1028 case BACKUP: 1029 /* 1030 * If we're pre-empting masters who advertise slower than us, 1031 * and this one claims to be slower, treat him as down. 1032 */ 1033 if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter 1034 || vh->vrrp_priority < sc->sc_vrrp_prio)) { 1035 carp_master_down_locked(sc, 1036 "preempting a slower master"); 1037 break; 1038 } 1039 1040 /* 1041 * Otherwise, we reset the counter and wait for the next 1042 * advertisement. 1043 */ 1044 carp_setrun(sc, af); 1045 break; 1046 } 1047 1048 out: 1049 CARP_UNLOCK(sc); 1050 m_freem(m); 1051 } 1052 1053 static int 1054 carp_tag(struct carp_softc *sc, struct mbuf *m) 1055 { 1056 struct m_tag *mtag; 1057 1058 /* Tag packet for carp_output */ 1059 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), 1060 M_NOWAIT)) == NULL) { 1061 m_freem(m); 1062 CARPSTATS_INC(carps_onomem); 1063 return (ENOMEM); 1064 } 1065 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1066 m_tag_prepend(m, mtag); 1067 1068 return (0); 1069 } 1070 1071 static void 1072 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 1073 { 1074 1075 MPASS(sc->sc_version == CARP_VERSION_CARP); 1076 1077 if (sc->sc_init_counter) { 1078 /* this could also be seconds since unix epoch */ 1079 sc->sc_counter = arc4random(); 1080 sc->sc_counter = sc->sc_counter << 32; 1081 sc->sc_counter += arc4random(); 1082 } else 1083 sc->sc_counter++; 1084 1085 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 1086 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 1087 1088 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1089 } 1090 1091 static inline void 1092 send_ad_locked(struct carp_softc *sc) 1093 { 1094 switch (sc->sc_version) { 1095 case CARP_VERSION_CARP: 1096 carp_send_ad_locked(sc); 1097 break; 1098 case CARP_VERSION_VRRPv3: 1099 vrrp_send_ad_locked(sc); 1100 break; 1101 } 1102 } 1103 1104 /* 1105 * To avoid LORs and possible recursions this function shouldn't 1106 * be called directly, but scheduled via taskqueue. 1107 */ 1108 static void 1109 carp_send_ad_all(void *ctx __unused, int pending __unused) 1110 { 1111 struct carp_softc *sc; 1112 struct epoch_tracker et; 1113 1114 NET_EPOCH_ENTER(et); 1115 mtx_lock(&carp_mtx); 1116 LIST_FOREACH(sc, &carp_list, sc_next) 1117 if (sc->sc_state == MASTER) { 1118 CARP_LOCK(sc); 1119 CURVNET_SET(sc->sc_carpdev->if_vnet); 1120 send_ad_locked(sc); 1121 CURVNET_RESTORE(); 1122 CARP_UNLOCK(sc); 1123 } 1124 mtx_unlock(&carp_mtx); 1125 NET_EPOCH_EXIT(et); 1126 } 1127 1128 /* Send a periodic advertisement, executed in callout context. */ 1129 static void 1130 carp_callout(void *v) 1131 { 1132 struct carp_softc *sc = v; 1133 struct epoch_tracker et; 1134 1135 NET_EPOCH_ENTER(et); 1136 CARP_LOCK_ASSERT(sc); 1137 CURVNET_SET(sc->sc_carpdev->if_vnet); 1138 send_ad_locked(sc); 1139 CURVNET_RESTORE(); 1140 CARP_UNLOCK(sc); 1141 NET_EPOCH_EXIT(et); 1142 } 1143 1144 static void 1145 carp_send_ad_error(struct carp_softc *sc, int error) 1146 { 1147 1148 /* 1149 * We track errors and successful sends with this logic: 1150 * - Any error resets success counter to 0. 1151 * - MAX_ERRORS triggers demotion. 1152 * - MIN_SUCCESS successes resets error counter to 0. 1153 * - MIN_SUCCESS reverts demotion, if it was triggered before. 1154 */ 1155 if (error) { 1156 if (sc->sc_sendad_errors < INT_MAX) 1157 sc->sc_sendad_errors++; 1158 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1159 static const char fmt[] = "send error %d on %s"; 1160 char msg[sizeof(fmt) + IFNAMSIZ]; 1161 1162 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 1163 carp_demote_adj(V_carp_senderr_adj, msg); 1164 } 1165 sc->sc_sendad_success = 0; 1166 } else if (sc->sc_sendad_errors > 0) { 1167 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 1168 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1169 static const char fmt[] = "send ok on %s"; 1170 char msg[sizeof(fmt) + IFNAMSIZ]; 1171 1172 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 1173 carp_demote_adj(-V_carp_senderr_adj, msg); 1174 } 1175 sc->sc_sendad_errors = 0; 1176 } 1177 } 1178 } 1179 1180 /* 1181 * Pick the best ifaddr on the given ifp for sending CARP 1182 * advertisements. 1183 * 1184 * "Best" here is defined by ifa_preferred(). This function is much 1185 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 1186 * 1187 * (This could be simplified to return the actual address, except that 1188 * it has a different format in AF_INET and AF_INET6.) 1189 */ 1190 static struct ifaddr * 1191 carp_best_ifa(int af, struct ifnet *ifp) 1192 { 1193 struct ifaddr *ifa, *best; 1194 1195 NET_EPOCH_ASSERT(); 1196 1197 if (af >= AF_MAX) 1198 return (NULL); 1199 best = NULL; 1200 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1201 if (ifa->ifa_addr->sa_family == af && 1202 (best == NULL || ifa_preferred(best, ifa))) 1203 best = ifa; 1204 } 1205 if (best != NULL) 1206 ifa_ref(best); 1207 return (best); 1208 } 1209 1210 static void 1211 carp_send_ad_locked(struct carp_softc *sc) 1212 { 1213 struct carp_header ch; 1214 struct timeval tv; 1215 struct ifaddr *ifa; 1216 struct carp_header *ch_ptr; 1217 struct mbuf *m; 1218 int len, advskew; 1219 1220 NET_EPOCH_ASSERT(); 1221 CARP_LOCK_ASSERT(sc); 1222 MPASS(sc->sc_version == CARP_VERSION_CARP); 1223 1224 advskew = DEMOTE_ADVSKEW(sc); 1225 tv.tv_sec = sc->sc_advbase; 1226 tv.tv_usec = advskew * 1000000 / 256; 1227 1228 ch.carp_version = CARP_VERSION_CARP; 1229 ch.carp_type = CARP_ADVERTISEMENT; 1230 ch.carp_vhid = sc->sc_vhid; 1231 ch.carp_advbase = sc->sc_advbase; 1232 ch.carp_advskew = advskew; 1233 ch.carp_authlen = 7; /* XXX DEFINE */ 1234 ch.carp_pad1 = 0; /* must be zero */ 1235 ch.carp_cksum = 0; 1236 1237 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 1238 1239 #ifdef INET 1240 if (sc->sc_naddrs) { 1241 struct ip *ip; 1242 1243 m = m_gethdr(M_NOWAIT, MT_DATA); 1244 if (m == NULL) { 1245 CARPSTATS_INC(carps_onomem); 1246 goto resched; 1247 } 1248 len = sizeof(*ip) + sizeof(ch); 1249 m->m_pkthdr.len = len; 1250 m->m_pkthdr.rcvif = NULL; 1251 m->m_len = len; 1252 M_ALIGN(m, m->m_len); 1253 if (IN_MULTICAST(sc->sc_carpaddr.s_addr)) 1254 m->m_flags |= M_MCAST; 1255 ip = mtod(m, struct ip *); 1256 ip->ip_v = IPVERSION; 1257 ip->ip_hl = sizeof(*ip) >> 2; 1258 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1259 ip->ip_len = htons(len); 1260 ip->ip_off = htons(IP_DF); 1261 ip->ip_ttl = CARP_DFLTTL; 1262 ip->ip_p = IPPROTO_CARP; 1263 ip->ip_sum = 0; 1264 ip_fillid(ip); 1265 1266 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1267 if (ifa != NULL) { 1268 ip->ip_src.s_addr = 1269 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1270 ifa_free(ifa); 1271 } else 1272 ip->ip_src.s_addr = 0; 1273 ip->ip_dst = sc->sc_carpaddr; 1274 1275 ch_ptr = (struct carp_header *)(&ip[1]); 1276 bcopy(&ch, ch_ptr, sizeof(ch)); 1277 carp_prepare_ad(m, sc, ch_ptr); 1278 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && 1279 carp_tag(sc, m) != 0) 1280 goto resched; 1281 1282 m->m_data += sizeof(*ip); 1283 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1284 m->m_data -= sizeof(*ip); 1285 1286 CARPSTATS_INC(carps_opackets); 1287 1288 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1289 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1290 } 1291 #endif /* INET */ 1292 #ifdef INET6 1293 if (sc->sc_naddrs6) { 1294 struct ip6_hdr *ip6; 1295 1296 m = m_gethdr(M_NOWAIT, MT_DATA); 1297 if (m == NULL) { 1298 CARPSTATS_INC(carps_onomem); 1299 goto resched; 1300 } 1301 len = sizeof(*ip6) + sizeof(ch); 1302 m->m_pkthdr.len = len; 1303 m->m_pkthdr.rcvif = NULL; 1304 m->m_len = len; 1305 M_ALIGN(m, m->m_len); 1306 ip6 = mtod(m, struct ip6_hdr *); 1307 bzero(ip6, sizeof(*ip6)); 1308 ip6->ip6_vfc |= IPV6_VERSION; 1309 /* Traffic class isn't defined in ip6 struct instead 1310 * it gets offset into flowid field */ 1311 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1312 IPTOS_DSCP_OFFSET)); 1313 ip6->ip6_hlim = CARP_DFLTTL; 1314 ip6->ip6_nxt = IPPROTO_CARP; 1315 1316 /* set the source address */ 1317 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1318 if (ifa != NULL) { 1319 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1320 sizeof(struct in6_addr)); 1321 ifa_free(ifa); 1322 } else 1323 /* This should never happen with IPv6. */ 1324 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1325 1326 /* Set the multicast destination. */ 1327 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1328 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1329 IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { 1330 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1331 m_freem(m); 1332 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1333 goto resched; 1334 } 1335 } 1336 1337 ch_ptr = (struct carp_header *)(&ip6[1]); 1338 bcopy(&ch, ch_ptr, sizeof(ch)); 1339 carp_prepare_ad(m, sc, ch_ptr); 1340 if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && 1341 carp_tag(sc, m) != 0) 1342 goto resched; 1343 1344 m->m_data += sizeof(*ip6); 1345 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1346 m->m_data -= sizeof(*ip6); 1347 1348 CARPSTATS_INC(carps_opackets6); 1349 1350 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1351 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1352 } 1353 #endif /* INET6 */ 1354 1355 resched: 1356 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); 1357 } 1358 1359 static void 1360 vrrp_send_ad_locked(struct carp_softc *sc) 1361 { 1362 struct vrrpv3_header *vh_ptr; 1363 struct ifaddr *ifa; 1364 struct mbuf *m; 1365 int len; 1366 struct vrrpv3_header vh = { 1367 .vrrp_version = CARP_VERSION_VRRPv3, 1368 .vrrp_type = VRRP_TYPE_ADVERTISEMENT, 1369 .vrrp_vrtid = sc->sc_vhid, 1370 .vrrp_priority = sc->sc_vrrp_prio, 1371 .vrrp_count_addr = 0, 1372 .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), 1373 .vrrp_checksum = 0, 1374 }; 1375 1376 NET_EPOCH_ASSERT(); 1377 CARP_LOCK_ASSERT(sc); 1378 MPASS(sc->sc_version == CARP_VERSION_VRRPv3); 1379 1380 #ifdef INET 1381 if (sc->sc_naddrs) { 1382 struct ip *ip; 1383 1384 m = m_gethdr(M_NOWAIT, MT_DATA); 1385 if (m == NULL) { 1386 CARPSTATS_INC(carps_onomem); 1387 goto resched; 1388 } 1389 len = sizeof(*ip) + sizeof(vh); 1390 m->m_pkthdr.len = len; 1391 m->m_pkthdr.rcvif = NULL; 1392 m->m_len = len; 1393 M_ALIGN(m, m->m_len); 1394 m->m_flags |= M_MCAST; 1395 ip = mtod(m, struct ip *); 1396 ip->ip_v = IPVERSION; 1397 ip->ip_hl = sizeof(*ip) >> 2; 1398 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1399 ip->ip_off = htons(IP_DF); 1400 ip->ip_ttl = CARP_DFLTTL; 1401 ip->ip_p = IPPROTO_CARP; 1402 ip->ip_sum = 0; 1403 ip_fillid(ip); 1404 1405 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1406 if (ifa != NULL) { 1407 ip->ip_src.s_addr = 1408 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1409 ifa_free(ifa); 1410 } else 1411 ip->ip_src.s_addr = 0; 1412 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1413 1414 /* Include the IP addresses in the announcement. */ 1415 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1416 struct sockaddr_in *in; 1417 1418 MPASS(sc->sc_ifas[i] != NULL); 1419 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) 1420 continue; 1421 1422 in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; 1423 1424 if (m_append(m, sizeof(in->sin_addr), 1425 (caddr_t)&in->sin_addr) != 1) { 1426 m_freem(m); 1427 goto resched; 1428 } 1429 1430 vh.vrrp_count_addr++; 1431 len += sizeof(in->sin_addr); 1432 } 1433 ip->ip_len = htons(len); 1434 1435 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); 1436 bcopy(&vh, vh_ptr, sizeof(vh)); 1437 1438 vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, 1439 ip->ip_dst.s_addr, 1440 htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); 1441 vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); 1442 1443 if (carp_tag(sc, m)) 1444 goto resched; 1445 1446 CARPSTATS_INC(carps_opackets); 1447 1448 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1449 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1450 } 1451 #endif 1452 #ifdef INET6 1453 if (sc->sc_naddrs6) { 1454 struct ip6_hdr *ip6; 1455 1456 m = m_gethdr(M_NOWAIT, MT_DATA); 1457 if (m == NULL) { 1458 CARPSTATS_INC(carps_onomem); 1459 goto resched; 1460 } 1461 len = sizeof(*ip6) + sizeof(vh); 1462 m->m_pkthdr.len = len; 1463 m->m_pkthdr.rcvif = NULL; 1464 m->m_len = len; 1465 M_ALIGN(m, m->m_len); 1466 m->m_flags |= M_MCAST; 1467 ip6 = mtod(m, struct ip6_hdr *); 1468 bzero(ip6, sizeof(*ip6)); 1469 ip6->ip6_vfc |= IPV6_VERSION; 1470 /* Traffic class isn't defined in ip6 struct instead 1471 * it gets offset into flowid field */ 1472 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1473 IPTOS_DSCP_OFFSET)); 1474 ip6->ip6_hlim = CARP_DFLTTL; 1475 ip6->ip6_nxt = IPPROTO_CARP; 1476 1477 /* set the source address */ 1478 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1479 if (ifa != NULL) { 1480 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1481 sizeof(struct in6_addr)); 1482 ifa_free(ifa); 1483 } else 1484 /* This should never happen with IPv6. */ 1485 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1486 1487 /* Set the multicast destination. */ 1488 bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); 1489 ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1490 ip6->ip6_dst.s6_addr8[15] = 0x12; 1491 1492 /* Include the IP addresses in the announcement. */ 1493 len = sizeof(vh); 1494 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1495 struct sockaddr_in6 *in6; 1496 1497 MPASS(sc->sc_ifas[i] != NULL); 1498 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) 1499 continue; 1500 1501 in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; 1502 1503 if (m_append(m, sizeof(in6->sin6_addr), 1504 (char *)&in6->sin6_addr) != 1) { 1505 m_freem(m); 1506 goto resched; 1507 } 1508 1509 vh.vrrp_count_addr++; 1510 len += sizeof(in6->sin6_addr); 1511 } 1512 ip6->ip6_plen = htonl(len); 1513 1514 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 1515 bcopy(&vh, vh_ptr, sizeof(vh)); 1516 1517 vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); 1518 vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); 1519 1520 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1521 m_freem(m); 1522 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1523 goto resched; 1524 } 1525 1526 if (carp_tag(sc, m)) 1527 goto resched; 1528 CARPSTATS_INC(carps_opackets6); 1529 1530 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1531 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1532 } 1533 #endif 1534 1535 resched: 1536 callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, 1537 carp_callout, sc); 1538 } 1539 1540 static void 1541 carp_addroute(struct carp_softc *sc) 1542 { 1543 struct ifaddr *ifa; 1544 1545 CARP_FOREACH_IFA(sc, ifa) 1546 carp_ifa_addroute(ifa); 1547 } 1548 1549 static void 1550 carp_ifa_addroute(struct ifaddr *ifa) 1551 { 1552 1553 switch (ifa->ifa_addr->sa_family) { 1554 #ifdef INET 1555 case AF_INET: 1556 in_addprefix(ifatoia(ifa)); 1557 ifa_add_loopback_route(ifa, 1558 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1559 break; 1560 #endif 1561 #ifdef INET6 1562 case AF_INET6: 1563 ifa_add_loopback_route(ifa, 1564 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1565 nd6_add_ifa_lle(ifatoia6(ifa)); 1566 break; 1567 #endif 1568 } 1569 } 1570 1571 static void 1572 carp_delroute(struct carp_softc *sc) 1573 { 1574 struct ifaddr *ifa; 1575 1576 CARP_FOREACH_IFA(sc, ifa) 1577 carp_ifa_delroute(ifa); 1578 } 1579 1580 static void 1581 carp_ifa_delroute(struct ifaddr *ifa) 1582 { 1583 1584 switch (ifa->ifa_addr->sa_family) { 1585 #ifdef INET 1586 case AF_INET: 1587 ifa_del_loopback_route(ifa, 1588 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1589 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1590 break; 1591 #endif 1592 #ifdef INET6 1593 case AF_INET6: 1594 ifa_del_loopback_route(ifa, 1595 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1596 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1597 break; 1598 #endif 1599 } 1600 } 1601 1602 int 1603 carp_master(struct ifaddr *ifa) 1604 { 1605 struct carp_softc *sc = ifa->ifa_carp; 1606 1607 return (sc->sc_state == MASTER); 1608 } 1609 1610 #ifdef INET 1611 /* 1612 * Broadcast a gratuitous ARP request containing 1613 * the virtual router MAC address for each IP address 1614 * associated with the virtual router. 1615 */ 1616 static void 1617 carp_send_arp(struct carp_softc *sc) 1618 { 1619 struct ifaddr *ifa; 1620 struct in_addr addr; 1621 1622 NET_EPOCH_ASSERT(); 1623 1624 CARP_FOREACH_IFA(sc, ifa) { 1625 if (ifa->ifa_addr->sa_family != AF_INET) 1626 continue; 1627 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1628 arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); 1629 } 1630 } 1631 1632 int 1633 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1634 { 1635 struct carp_softc *sc = ifa->ifa_carp; 1636 1637 if (sc->sc_state == MASTER) { 1638 *enaddr = sc->sc_addr; 1639 return (1); 1640 } 1641 1642 return (0); 1643 } 1644 #endif 1645 1646 #ifdef INET6 1647 static void 1648 carp_send_na(struct carp_softc *sc) 1649 { 1650 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1651 struct ifaddr *ifa; 1652 struct in6_addr *in6; 1653 1654 CARP_FOREACH_IFA(sc, ifa) { 1655 if (ifa->ifa_addr->sa_family != AF_INET6) 1656 continue; 1657 1658 in6 = IFA_IN6(ifa); 1659 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1660 ND_NA_FLAG_OVERRIDE, 1, NULL); 1661 DELAY(1000); /* XXX */ 1662 } 1663 } 1664 1665 /* 1666 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1667 * matches and is not a carp address. Returns NULL otherwise. 1668 */ 1669 struct ifaddr * 1670 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1671 { 1672 struct ifaddr *ifa; 1673 1674 NET_EPOCH_ASSERT(); 1675 1676 ifa = NULL; 1677 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1678 if (ifa->ifa_addr->sa_family != AF_INET6) 1679 continue; 1680 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1681 continue; 1682 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1683 ifa = NULL; 1684 else 1685 ifa_ref(ifa); 1686 break; 1687 } 1688 1689 return (ifa); 1690 } 1691 1692 char * 1693 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1694 { 1695 struct ifaddr *ifa; 1696 1697 NET_EPOCH_ASSERT(); 1698 1699 IFNET_FOREACH_IFA(ifp, ifa) 1700 if (ifa->ifa_addr->sa_family == AF_INET6 && 1701 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1702 struct carp_softc *sc = ifa->ifa_carp; 1703 struct m_tag *mtag; 1704 1705 mtag = m_tag_get(PACKET_TAG_CARP, 1706 sizeof(struct carp_softc *), M_NOWAIT); 1707 if (mtag == NULL) 1708 /* Better a bit than nothing. */ 1709 return (sc->sc_addr); 1710 1711 bcopy(&sc, mtag + 1, sizeof(sc)); 1712 m_tag_prepend(m, mtag); 1713 1714 return (sc->sc_addr); 1715 } 1716 1717 return (NULL); 1718 } 1719 #endif /* INET6 */ 1720 1721 int 1722 carp_forus(struct ifnet *ifp, u_char *dhost) 1723 { 1724 struct carp_softc *sc; 1725 uint8_t *ena = dhost; 1726 1727 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1728 return (0); 1729 1730 CIF_LOCK(ifp->if_carp); 1731 IFNET_FOREACH_CARP(ifp, sc) { 1732 /* 1733 * CARP_LOCK() is not here, since would protect nothing, but 1734 * cause deadlock with if_bridge, calling this under its lock. 1735 */ 1736 if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, 1737 ETHER_ADDR_LEN)) { 1738 CIF_UNLOCK(ifp->if_carp); 1739 return (1); 1740 } 1741 } 1742 CIF_UNLOCK(ifp->if_carp); 1743 1744 return (0); 1745 } 1746 1747 /* Master down timeout event, executed in callout context. */ 1748 static void 1749 carp_master_down(void *v) 1750 { 1751 struct carp_softc *sc = v; 1752 struct epoch_tracker et; 1753 1754 NET_EPOCH_ENTER(et); 1755 CARP_LOCK_ASSERT(sc); 1756 1757 CURVNET_SET(sc->sc_carpdev->if_vnet); 1758 if (sc->sc_state == BACKUP) { 1759 carp_master_down_locked(sc, "master timed out"); 1760 } 1761 CURVNET_RESTORE(); 1762 1763 CARP_UNLOCK(sc); 1764 NET_EPOCH_EXIT(et); 1765 } 1766 1767 static void 1768 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1769 { 1770 1771 NET_EPOCH_ASSERT(); 1772 CARP_LOCK_ASSERT(sc); 1773 1774 switch (sc->sc_state) { 1775 case BACKUP: 1776 carp_set_state(sc, MASTER, reason); 1777 send_ad_locked(sc); 1778 #ifdef INET 1779 carp_send_arp(sc); 1780 #endif 1781 #ifdef INET6 1782 carp_send_na(sc); 1783 #endif 1784 carp_setrun(sc, 0); 1785 carp_addroute(sc); 1786 break; 1787 case INIT: 1788 case MASTER: 1789 #ifdef INVARIANTS 1790 panic("carp: VHID %u@%s: master_down event in %s state\n", 1791 sc->sc_vhid, 1792 if_name(sc->sc_carpdev), 1793 sc->sc_state ? "MASTER" : "INIT"); 1794 #endif 1795 break; 1796 } 1797 } 1798 1799 /* 1800 * When in backup state, af indicates whether to reset the master down timer 1801 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1802 */ 1803 static void 1804 carp_setrun(struct carp_softc *sc, sa_family_t af) 1805 { 1806 struct timeval tv; 1807 int timeout; 1808 1809 CARP_LOCK_ASSERT(sc); 1810 1811 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1812 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1813 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1814 !V_carp_allow) 1815 return; 1816 1817 switch (sc->sc_state) { 1818 case INIT: 1819 carp_set_state(sc, BACKUP, "initialization complete"); 1820 carp_setrun(sc, 0); 1821 break; 1822 case BACKUP: 1823 callout_stop(&sc->sc_ad_tmo); 1824 1825 switch (sc->sc_version) { 1826 case CARP_VERSION_CARP: 1827 tv.tv_sec = 3 * sc->sc_advbase; 1828 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1829 timeout = tvtohz(&tv); 1830 break; 1831 case CARP_VERSION_VRRPv3: 1832 /* skew time */ 1833 timeout = (256 - sc->sc_vrrp_prio) * 1834 sc->sc_vrrp_master_inter / 256; 1835 timeout += (3 * sc->sc_vrrp_master_inter); 1836 timeout *= hz; 1837 timeout /= 100; /* master interval is in centiseconds */ 1838 break; 1839 } 1840 switch (af) { 1841 #ifdef INET 1842 case AF_INET: 1843 callout_reset(&sc->sc_md_tmo, timeout, 1844 carp_master_down, sc); 1845 break; 1846 #endif 1847 #ifdef INET6 1848 case AF_INET6: 1849 callout_reset(&sc->sc_md6_tmo, timeout, 1850 carp_master_down, sc); 1851 break; 1852 #endif 1853 default: 1854 #ifdef INET 1855 if (sc->sc_naddrs) 1856 callout_reset(&sc->sc_md_tmo, timeout, 1857 carp_master_down, sc); 1858 #endif 1859 #ifdef INET6 1860 if (sc->sc_naddrs6) 1861 callout_reset(&sc->sc_md6_tmo, timeout, 1862 carp_master_down, sc); 1863 #endif 1864 break; 1865 } 1866 break; 1867 case MASTER: 1868 switch (sc->sc_version) { 1869 case CARP_VERSION_CARP: 1870 tv.tv_sec = sc->sc_advbase; 1871 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1872 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1873 carp_callout, sc); 1874 break; 1875 case CARP_VERSION_VRRPv3: 1876 callout_reset(&sc->sc_ad_tmo, 1877 sc->sc_vrrp_adv_inter * hz / 100, 1878 carp_callout, sc); 1879 break; 1880 } 1881 break; 1882 } 1883 } 1884 1885 /* 1886 * Setup multicast structures. 1887 */ 1888 static int 1889 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1890 { 1891 struct ifnet *ifp = cif->cif_ifp; 1892 int error = 0; 1893 1894 switch (sa) { 1895 #ifdef INET 1896 case AF_INET: 1897 { 1898 struct ip_moptions *imo = &cif->cif_imo; 1899 struct in_mfilter *imf; 1900 struct in_addr addr; 1901 1902 if (ip_mfilter_first(&imo->imo_head) != NULL) 1903 return (0); 1904 1905 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1906 ip_mfilter_init(&imo->imo_head); 1907 imo->imo_multicast_vif = -1; 1908 1909 addr.s_addr = htonl(INADDR_CARP_GROUP); 1910 if ((error = in_joingroup(ifp, &addr, NULL, 1911 &imf->imf_inm)) != 0) { 1912 ip_mfilter_free(imf); 1913 break; 1914 } 1915 1916 ip_mfilter_insert(&imo->imo_head, imf); 1917 imo->imo_multicast_ifp = ifp; 1918 imo->imo_multicast_ttl = CARP_DFLTTL; 1919 imo->imo_multicast_loop = 0; 1920 break; 1921 } 1922 #endif 1923 #ifdef INET6 1924 case AF_INET6: 1925 { 1926 struct ip6_moptions *im6o = &cif->cif_im6o; 1927 struct in6_mfilter *im6f[2]; 1928 struct in6_addr in6; 1929 1930 if (ip6_mfilter_first(&im6o->im6o_head)) 1931 return (0); 1932 1933 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1934 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1935 1936 ip6_mfilter_init(&im6o->im6o_head); 1937 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1938 im6o->im6o_multicast_ifp = ifp; 1939 1940 /* Join IPv6 CARP multicast group. */ 1941 bzero(&in6, sizeof(in6)); 1942 in6.s6_addr16[0] = htons(0xff02); 1943 in6.s6_addr8[15] = 0x12; 1944 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1945 ip6_mfilter_free(im6f[0]); 1946 ip6_mfilter_free(im6f[1]); 1947 break; 1948 } 1949 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1950 ip6_mfilter_free(im6f[0]); 1951 ip6_mfilter_free(im6f[1]); 1952 break; 1953 } 1954 1955 /* Join solicited multicast address. */ 1956 bzero(&in6, sizeof(in6)); 1957 in6.s6_addr16[0] = htons(0xff02); 1958 in6.s6_addr32[1] = 0; 1959 in6.s6_addr32[2] = htonl(1); 1960 in6.s6_addr32[3] = 0; 1961 in6.s6_addr8[12] = 0xff; 1962 1963 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1964 ip6_mfilter_free(im6f[0]); 1965 ip6_mfilter_free(im6f[1]); 1966 break; 1967 } 1968 1969 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1970 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1971 ip6_mfilter_free(im6f[0]); 1972 ip6_mfilter_free(im6f[1]); 1973 break; 1974 } 1975 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1976 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1977 break; 1978 } 1979 #endif 1980 } 1981 1982 return (error); 1983 } 1984 1985 /* 1986 * Free multicast structures. 1987 */ 1988 static void 1989 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1990 { 1991 #ifdef INET 1992 struct ip_moptions *imo = &cif->cif_imo; 1993 struct in_mfilter *imf; 1994 #endif 1995 #ifdef INET6 1996 struct ip6_moptions *im6o = &cif->cif_im6o; 1997 struct in6_mfilter *im6f; 1998 #endif 1999 sx_assert(&carp_sx, SA_XLOCKED); 2000 2001 switch (sa) { 2002 #ifdef INET 2003 case AF_INET: 2004 if (cif->cif_naddrs != 0) 2005 break; 2006 2007 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2008 ip_mfilter_remove(&imo->imo_head, imf); 2009 in_leavegroup(imf->imf_inm, NULL); 2010 ip_mfilter_free(imf); 2011 } 2012 break; 2013 #endif 2014 #ifdef INET6 2015 case AF_INET6: 2016 if (cif->cif_naddrs6 != 0) 2017 break; 2018 2019 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2020 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2021 in6_leavegroup(im6f->im6f_in6m, NULL); 2022 ip6_mfilter_free(im6f); 2023 } 2024 break; 2025 #endif 2026 } 2027 } 2028 2029 int 2030 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 2031 { 2032 struct m_tag *mtag; 2033 int vhid; 2034 2035 if (!sa) 2036 return (0); 2037 2038 switch (sa->sa_family) { 2039 #ifdef INET 2040 case AF_INET: 2041 break; 2042 #endif 2043 #ifdef INET6 2044 case AF_INET6: 2045 break; 2046 #endif 2047 default: 2048 return (0); 2049 } 2050 2051 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2052 if (mtag == NULL) 2053 return (0); 2054 2055 bcopy(mtag + 1, &vhid, sizeof(vhid)); 2056 2057 /* Set the source MAC address to the Virtual Router MAC Address. */ 2058 switch (ifp->if_type) { 2059 case IFT_ETHER: 2060 case IFT_BRIDGE: 2061 case IFT_L2VLAN: { 2062 struct ether_header *eh; 2063 2064 eh = mtod(m, struct ether_header *); 2065 eh->ether_shost[0] = 0; 2066 eh->ether_shost[1] = 0; 2067 eh->ether_shost[2] = 0x5e; 2068 eh->ether_shost[3] = 0; 2069 eh->ether_shost[4] = 1; 2070 eh->ether_shost[5] = vhid; 2071 } 2072 break; 2073 default: 2074 printf("%s: carp is not supported for the %d interface type\n", 2075 if_name(ifp), ifp->if_type); 2076 return (EOPNOTSUPP); 2077 } 2078 2079 return (0); 2080 } 2081 2082 static struct carp_softc* 2083 carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) 2084 { 2085 struct carp_softc *sc; 2086 struct carp_if *cif; 2087 2088 sx_assert(&carp_sx, SA_XLOCKED); 2089 2090 if ((cif = ifp->if_carp) == NULL) 2091 cif = carp_alloc_if(ifp); 2092 2093 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); 2094 *sc = (struct carp_softc ){ 2095 .sc_vhid = vhid, 2096 .sc_version = version, 2097 .sc_state = INIT, 2098 .sc_carpdev = ifp, 2099 .sc_ifasiz = sizeof(struct ifaddr *), 2100 .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, 2101 }; 2102 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 2103 2104 switch (version) { 2105 case CARP_VERSION_CARP: 2106 sc->sc_advbase = CARP_DFLTINTV; 2107 sc->sc_init_counter = true; 2108 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 2109 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 2110 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 2111 break; 2112 case CARP_VERSION_VRRPv3: 2113 sc->sc_vrrp_adv_inter = 100; 2114 sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; 2115 sc->sc_vrrp_prio = 100; 2116 break; 2117 } 2118 2119 CARP_LOCK_INIT(sc); 2120 #ifdef INET 2121 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2122 #endif 2123 #ifdef INET6 2124 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2125 #endif 2126 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2127 2128 CIF_LOCK(cif); 2129 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 2130 CIF_UNLOCK(cif); 2131 2132 mtx_lock(&carp_mtx); 2133 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 2134 mtx_unlock(&carp_mtx); 2135 2136 return (sc); 2137 } 2138 2139 static void 2140 carp_grow_ifas(struct carp_softc *sc) 2141 { 2142 struct ifaddr **new; 2143 2144 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 2145 CARP_LOCK(sc); 2146 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 2147 free(sc->sc_ifas, M_CARP); 2148 sc->sc_ifas = new; 2149 sc->sc_ifasiz *= 2; 2150 CARP_UNLOCK(sc); 2151 } 2152 2153 static void 2154 carp_destroy(struct carp_softc *sc) 2155 { 2156 struct ifnet *ifp = sc->sc_carpdev; 2157 struct carp_if *cif = ifp->if_carp; 2158 2159 sx_assert(&carp_sx, SA_XLOCKED); 2160 2161 if (sc->sc_suppress) 2162 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 2163 CARP_UNLOCK(sc); 2164 2165 CIF_LOCK(cif); 2166 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 2167 CIF_UNLOCK(cif); 2168 2169 mtx_lock(&carp_mtx); 2170 LIST_REMOVE(sc, sc_next); 2171 mtx_unlock(&carp_mtx); 2172 2173 callout_drain(&sc->sc_ad_tmo); 2174 #ifdef INET 2175 callout_drain(&sc->sc_md_tmo); 2176 #endif 2177 #ifdef INET6 2178 callout_drain(&sc->sc_md6_tmo); 2179 #endif 2180 CARP_LOCK_DESTROY(sc); 2181 2182 free(sc->sc_ifas, M_CARP); 2183 free(sc, M_CARP); 2184 } 2185 2186 static struct carp_if* 2187 carp_alloc_if(struct ifnet *ifp) 2188 { 2189 struct carp_if *cif; 2190 int error; 2191 2192 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 2193 2194 if ((error = ifpromisc(ifp, 1)) != 0) 2195 printf("%s: ifpromisc(%s) failed: %d\n", 2196 __func__, if_name(ifp), error); 2197 else 2198 cif->cif_flags |= CIF_PROMISC; 2199 2200 CIF_LOCK_INIT(cif); 2201 cif->cif_ifp = ifp; 2202 TAILQ_INIT(&cif->cif_vrs); 2203 2204 IF_ADDR_WLOCK(ifp); 2205 ifp->if_carp = cif; 2206 if_ref(ifp); 2207 IF_ADDR_WUNLOCK(ifp); 2208 2209 return (cif); 2210 } 2211 2212 static void 2213 carp_free_if(struct carp_if *cif) 2214 { 2215 struct ifnet *ifp = cif->cif_ifp; 2216 2217 CIF_LOCK_ASSERT(cif); 2218 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 2219 __func__)); 2220 2221 IF_ADDR_WLOCK(ifp); 2222 ifp->if_carp = NULL; 2223 IF_ADDR_WUNLOCK(ifp); 2224 2225 CIF_LOCK_DESTROY(cif); 2226 2227 if (cif->cif_flags & CIF_PROMISC) 2228 ifpromisc(ifp, 0); 2229 if_rele(ifp); 2230 2231 free(cif, M_CARP); 2232 } 2233 2234 static bool 2235 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 2236 { 2237 struct carpreq *carpr = arg; 2238 2239 CARP_LOCK(sc); 2240 carpr->carpr_state = sc->sc_state; 2241 carpr->carpr_vhid = sc->sc_vhid; 2242 switch (sc->sc_version) { 2243 case CARP_VERSION_CARP: 2244 carpr->carpr_advbase = sc->sc_advbase; 2245 carpr->carpr_advskew = sc->sc_advskew; 2246 if (priv) 2247 bcopy(sc->sc_key, carpr->carpr_key, 2248 sizeof(carpr->carpr_key)); 2249 else 2250 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 2251 break; 2252 case CARP_VERSION_VRRPv3: 2253 break; 2254 } 2255 CARP_UNLOCK(sc); 2256 2257 return (true); 2258 } 2259 2260 static int 2261 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 2262 { 2263 struct epoch_tracker et; 2264 struct carp_softc *sc = NULL; 2265 int error = 0; 2266 2267 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) 2268 return (EINVAL); 2269 2270 switch (carpr->carpr_version) { 2271 case CARP_VERSION_CARP: 2272 if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || 2273 carpr->carpr_advbase < CARP_DFLTINTV)) 2274 return (EINVAL); 2275 if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) 2276 return (EINVAL); 2277 break; 2278 case CARP_VERSION_VRRPv3: 2279 /* XXXGL: shouldn't we check anything? */ 2280 break; 2281 default: 2282 return (EINVAL); 2283 } 2284 2285 if (ifp->if_carp) { 2286 IFNET_FOREACH_CARP(ifp, sc) 2287 if (sc->sc_vhid == carpr->carpr_vhid) 2288 break; 2289 } 2290 2291 if (sc == NULL) 2292 sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); 2293 else if (sc->sc_version != carpr->carpr_version) 2294 return (EINVAL); 2295 2296 CARP_LOCK(sc); 2297 switch (sc->sc_version) { 2298 case CARP_VERSION_CARP: 2299 if (carpr->carpr_advbase != 0) 2300 sc->sc_advbase = carpr->carpr_advbase; 2301 sc->sc_advskew = carpr->carpr_advskew; 2302 if (carpr->carpr_addr.s_addr != INADDR_ANY) 2303 sc->sc_carpaddr = carpr->carpr_addr; 2304 if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 2305 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 2306 sizeof(sc->sc_carpaddr6)); 2307 } 2308 if (carpr->carpr_key[0] != '\0') { 2309 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2310 carp_hmac_prepare(sc); 2311 } 2312 break; 2313 case CARP_VERSION_VRRPv3: 2314 if (carpr->carpr_vrrp_priority != 0) 2315 sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; 2316 if (carpr->carpr_vrrp_adv_inter) 2317 sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; 2318 break; 2319 } 2320 2321 if (sc->sc_state != INIT && 2322 carpr->carpr_state != sc->sc_state) { 2323 switch (carpr->carpr_state) { 2324 case BACKUP: 2325 callout_stop(&sc->sc_ad_tmo); 2326 carp_set_state(sc, BACKUP, 2327 "user requested via ifconfig"); 2328 carp_setrun(sc, 0); 2329 carp_delroute(sc); 2330 break; 2331 case MASTER: 2332 NET_EPOCH_ENTER(et); 2333 carp_master_down_locked(sc, 2334 "user requested via ifconfig"); 2335 NET_EPOCH_EXIT(et); 2336 break; 2337 default: 2338 break; 2339 } 2340 } 2341 CARP_UNLOCK(sc); 2342 2343 return (error); 2344 } 2345 2346 static int 2347 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 2348 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 2349 { 2350 int priveleged; 2351 struct carp_softc *sc; 2352 2353 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 2354 return (EINVAL); 2355 if (carpr->carpr_count < 1) 2356 return (EMSGSIZE); 2357 if (ifp->if_carp == NULL) 2358 return (ENOENT); 2359 2360 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 2361 if (carpr->carpr_vhid != 0) { 2362 IFNET_FOREACH_CARP(ifp, sc) 2363 if (sc->sc_vhid == carpr->carpr_vhid) 2364 break; 2365 if (sc == NULL) 2366 return (ENOENT); 2367 2368 if (! outfn(arg, sc, priveleged)) 2369 return (ENOMEM); 2370 carpr->carpr_count = 1; 2371 } else { 2372 int count; 2373 2374 count = 0; 2375 IFNET_FOREACH_CARP(ifp, sc) 2376 count++; 2377 2378 if (count > carpr->carpr_count) 2379 return (EMSGSIZE); 2380 2381 IFNET_FOREACH_CARP(ifp, sc) { 2382 if (! outfn(arg, sc, priveleged)) 2383 return (ENOMEM); 2384 carpr->carpr_count = count; 2385 } 2386 } 2387 2388 return (0); 2389 } 2390 2391 int 2392 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 2393 { 2394 struct carpreq carpr; 2395 struct carpkreq carprk = { 2396 .carpr_version = CARP_VERSION_CARP, 2397 }; 2398 struct ifnet *ifp; 2399 int error = 0; 2400 2401 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 2402 return (error); 2403 2404 ifp = ifunit_ref(ifr->ifr_name); 2405 if ((error = carp_is_supported_if(ifp)) != 0) 2406 goto out; 2407 2408 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2409 error = EADDRNOTAVAIL; 2410 goto out; 2411 } 2412 2413 sx_xlock(&carp_sx); 2414 switch (cmd) { 2415 case SIOCSVH: 2416 if ((error = priv_check(td, PRIV_NETINET_CARP))) 2417 break; 2418 2419 memcpy(&carprk, &carpr, sizeof(carpr)); 2420 error = carp_ioctl_set(ifp, &carprk); 2421 break; 2422 2423 case SIOCGVH: 2424 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 2425 carp_carprcp, &carpr); 2426 if (error == 0) { 2427 error = copyout(&carpr, 2428 (char *)ifr_data_get_ptr(ifr), 2429 carpr.carpr_count * sizeof(carpr)); 2430 } 2431 break; 2432 default: 2433 error = EINVAL; 2434 } 2435 sx_xunlock(&carp_sx); 2436 2437 out: 2438 if (ifp != NULL) 2439 if_rele(ifp); 2440 2441 return (error); 2442 } 2443 2444 static int 2445 carp_get_vhid(struct ifaddr *ifa) 2446 { 2447 2448 if (ifa == NULL || ifa->ifa_carp == NULL) 2449 return (0); 2450 2451 return (ifa->ifa_carp->sc_vhid); 2452 } 2453 2454 int 2455 carp_attach(struct ifaddr *ifa, int vhid) 2456 { 2457 struct ifnet *ifp = ifa->ifa_ifp; 2458 struct carp_if *cif = ifp->if_carp; 2459 struct carp_softc *sc; 2460 int index, error; 2461 2462 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 2463 2464 switch (ifa->ifa_addr->sa_family) { 2465 #ifdef INET 2466 case AF_INET: 2467 #endif 2468 #ifdef INET6 2469 case AF_INET6: 2470 #endif 2471 break; 2472 default: 2473 return (EPROTOTYPE); 2474 } 2475 2476 sx_xlock(&carp_sx); 2477 if (ifp->if_carp == NULL) { 2478 sx_xunlock(&carp_sx); 2479 return (ENOPROTOOPT); 2480 } 2481 2482 IFNET_FOREACH_CARP(ifp, sc) 2483 if (sc->sc_vhid == vhid) 2484 break; 2485 if (sc == NULL) { 2486 sx_xunlock(&carp_sx); 2487 return (ENOENT); 2488 } 2489 2490 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2491 if (error) { 2492 CIF_FREE(cif); 2493 sx_xunlock(&carp_sx); 2494 return (error); 2495 } 2496 2497 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2498 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2499 carp_grow_ifas(sc); 2500 2501 switch (ifa->ifa_addr->sa_family) { 2502 #ifdef INET 2503 case AF_INET: 2504 cif->cif_naddrs++; 2505 sc->sc_naddrs++; 2506 break; 2507 #endif 2508 #ifdef INET6 2509 case AF_INET6: 2510 cif->cif_naddrs6++; 2511 sc->sc_naddrs6++; 2512 break; 2513 #endif 2514 } 2515 2516 ifa_ref(ifa); 2517 2518 CARP_LOCK(sc); 2519 sc->sc_ifas[index - 1] = ifa; 2520 ifa->ifa_carp = sc; 2521 if (sc->sc_version == CARP_VERSION_CARP) 2522 carp_hmac_prepare(sc); 2523 carp_sc_state(sc); 2524 CARP_UNLOCK(sc); 2525 2526 sx_xunlock(&carp_sx); 2527 2528 return (0); 2529 } 2530 2531 void 2532 carp_detach(struct ifaddr *ifa, bool keep_cif) 2533 { 2534 struct ifnet *ifp = ifa->ifa_ifp; 2535 struct carp_if *cif = ifp->if_carp; 2536 struct carp_softc *sc = ifa->ifa_carp; 2537 int i, index; 2538 2539 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2540 2541 sx_xlock(&carp_sx); 2542 2543 CARP_LOCK(sc); 2544 /* Shift array. */ 2545 index = sc->sc_naddrs + sc->sc_naddrs6; 2546 for (i = 0; i < index; i++) 2547 if (sc->sc_ifas[i] == ifa) 2548 break; 2549 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2550 for (; i < index - 1; i++) 2551 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2552 sc->sc_ifas[index - 1] = NULL; 2553 2554 switch (ifa->ifa_addr->sa_family) { 2555 #ifdef INET 2556 case AF_INET: 2557 cif->cif_naddrs--; 2558 sc->sc_naddrs--; 2559 break; 2560 #endif 2561 #ifdef INET6 2562 case AF_INET6: 2563 cif->cif_naddrs6--; 2564 sc->sc_naddrs6--; 2565 break; 2566 #endif 2567 } 2568 2569 carp_ifa_delroute(ifa); 2570 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2571 2572 ifa->ifa_carp = NULL; 2573 ifa_free(ifa); 2574 2575 if (sc->sc_version == CARP_VERSION_CARP) 2576 carp_hmac_prepare(sc); 2577 carp_sc_state(sc); 2578 2579 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2580 carp_destroy(sc); 2581 else 2582 CARP_UNLOCK(sc); 2583 2584 if (!keep_cif) 2585 CIF_FREE(cif); 2586 2587 sx_xunlock(&carp_sx); 2588 } 2589 2590 static void 2591 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2592 { 2593 2594 CARP_LOCK_ASSERT(sc); 2595 2596 if (sc->sc_state != state) { 2597 const char *carp_states[] = { CARP_STATES }; 2598 char subsys[IFNAMSIZ+5]; 2599 2600 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2601 if_name(sc->sc_carpdev)); 2602 2603 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2604 carp_states[sc->sc_state], carp_states[state], reason); 2605 2606 sc->sc_state = state; 2607 2608 devctl_notify("CARP", subsys, carp_states[state], NULL); 2609 } 2610 } 2611 2612 static void 2613 carp_linkstate(struct ifnet *ifp) 2614 { 2615 struct carp_softc *sc; 2616 2617 CIF_LOCK(ifp->if_carp); 2618 IFNET_FOREACH_CARP(ifp, sc) { 2619 CARP_LOCK(sc); 2620 carp_sc_state(sc); 2621 CARP_UNLOCK(sc); 2622 } 2623 CIF_UNLOCK(ifp->if_carp); 2624 } 2625 2626 static void 2627 carp_sc_state(struct carp_softc *sc) 2628 { 2629 2630 CARP_LOCK_ASSERT(sc); 2631 2632 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2633 !(sc->sc_carpdev->if_flags & IFF_UP) || 2634 !V_carp_allow) { 2635 callout_stop(&sc->sc_ad_tmo); 2636 #ifdef INET 2637 callout_stop(&sc->sc_md_tmo); 2638 #endif 2639 #ifdef INET6 2640 callout_stop(&sc->sc_md6_tmo); 2641 #endif 2642 carp_set_state(sc, INIT, "hardware interface down"); 2643 carp_setrun(sc, 0); 2644 carp_delroute(sc); 2645 if (!sc->sc_suppress) 2646 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2647 sc->sc_suppress = 1; 2648 } else { 2649 carp_set_state(sc, INIT, "hardware interface up"); 2650 carp_setrun(sc, 0); 2651 if (sc->sc_suppress) 2652 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2653 sc->sc_suppress = 0; 2654 } 2655 } 2656 2657 static void 2658 carp_demote_adj(int adj, char *reason) 2659 { 2660 atomic_add_int(&V_carp_demotion, adj); 2661 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2662 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2663 } 2664 2665 static int 2666 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2667 { 2668 int new, error; 2669 struct carp_softc *sc; 2670 2671 new = V_carp_allow; 2672 error = sysctl_handle_int(oidp, &new, 0, req); 2673 if (error || !req->newptr) 2674 return (error); 2675 2676 if (V_carp_allow != new) { 2677 V_carp_allow = new; 2678 2679 mtx_lock(&carp_mtx); 2680 LIST_FOREACH(sc, &carp_list, sc_next) { 2681 CARP_LOCK(sc); 2682 if (curvnet == sc->sc_carpdev->if_vnet) 2683 carp_sc_state(sc); 2684 CARP_UNLOCK(sc); 2685 } 2686 mtx_unlock(&carp_mtx); 2687 } 2688 2689 return (0); 2690 } 2691 2692 static int 2693 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2694 { 2695 int new, error; 2696 2697 new = V_carp_dscp; 2698 error = sysctl_handle_int(oidp, &new, 0, req); 2699 if (error || !req->newptr) 2700 return (error); 2701 2702 if (new < 0 || new > 63) 2703 return (EINVAL); 2704 2705 V_carp_dscp = new; 2706 2707 return (0); 2708 } 2709 2710 static int 2711 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2712 { 2713 int new, error; 2714 2715 new = V_carp_demotion; 2716 error = sysctl_handle_int(oidp, &new, 0, req); 2717 if (error || !req->newptr) 2718 return (error); 2719 2720 carp_demote_adj(new, "sysctl"); 2721 2722 return (0); 2723 } 2724 2725 static int 2726 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2727 { 2728 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2729 return (EINVAL); 2730 2731 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2732 return (0); 2733 } 2734 2735 struct carp_nl_send_args { 2736 struct nlmsghdr *hdr; 2737 struct nl_pstate *npt; 2738 }; 2739 2740 static bool 2741 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2742 { 2743 struct carp_nl_send_args *nlsa = arg; 2744 struct nlmsghdr *hdr = nlsa->hdr; 2745 struct nl_pstate *npt = nlsa->npt; 2746 struct nl_writer *nw = npt->nw; 2747 struct genlmsghdr *ghdr_new; 2748 2749 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2750 nlmsg_abort(nw); 2751 return (false); 2752 } 2753 2754 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2755 if (ghdr_new == NULL) { 2756 nlmsg_abort(nw); 2757 return (false); 2758 } 2759 2760 ghdr_new->cmd = CARP_NL_CMD_GET; 2761 ghdr_new->version = 0; 2762 ghdr_new->reserved = 0; 2763 2764 CARP_LOCK(sc); 2765 2766 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2767 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2768 nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); 2769 switch (sc->sc_version) { 2770 case CARP_VERSION_CARP: 2771 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2772 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2773 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2774 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2775 if (priv) 2776 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), 2777 sc->sc_key); 2778 break; 2779 case CARP_VERSION_VRRPv3: 2780 nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); 2781 nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, 2782 sc->sc_vrrp_adv_inter); 2783 break; 2784 } 2785 2786 CARP_UNLOCK(sc); 2787 2788 if (! nlmsg_end(nw)) { 2789 nlmsg_abort(nw); 2790 return (false); 2791 } 2792 2793 return (true); 2794 } 2795 2796 struct nl_carp_parsed { 2797 unsigned int ifindex; 2798 char *ifname; 2799 uint32_t state; 2800 uint32_t vhid; 2801 int32_t advbase; 2802 int32_t advskew; 2803 char key[CARP_KEY_LEN]; 2804 struct in_addr addr; 2805 struct in6_addr addr6; 2806 carp_version_t version; 2807 uint8_t vrrp_prio; 2808 uint16_t vrrp_adv_inter; 2809 }; 2810 2811 #define _IN(_field) offsetof(struct genlmsghdr, _field) 2812 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2813 2814 static const struct nlattr_parser nla_p_set[] = { 2815 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2816 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2817 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2818 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2819 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2820 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2821 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2822 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2823 { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, 2824 { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, 2825 { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, 2826 { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, 2827 }; 2828 static const struct nlfield_parser nlf_p_set[] = { 2829 }; 2830 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set); 2831 #undef _IN 2832 #undef _OUT 2833 2834 2835 static int 2836 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2837 { 2838 struct nl_carp_parsed attrs = { }; 2839 struct carp_nl_send_args args; 2840 struct carpreq carpr = { }; 2841 struct epoch_tracker et; 2842 if_t ifp = NULL; 2843 int error; 2844 2845 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2846 if (error != 0) 2847 return (error); 2848 2849 NET_EPOCH_ENTER(et); 2850 if (attrs.ifname != NULL) 2851 ifp = ifunit_ref(attrs.ifname); 2852 else if (attrs.ifindex != 0) 2853 ifp = ifnet_byindex_ref(attrs.ifindex); 2854 NET_EPOCH_EXIT(et); 2855 2856 if ((error = carp_is_supported_if(ifp)) != 0) 2857 goto out; 2858 2859 hdr->nlmsg_flags |= NLM_F_MULTI; 2860 args.hdr = hdr; 2861 args.npt = npt; 2862 2863 carpr.carpr_vhid = attrs.vhid; 2864 carpr.carpr_count = CARP_MAXVHID; 2865 2866 sx_xlock(&carp_sx); 2867 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2868 carp_nl_send, &args); 2869 sx_xunlock(&carp_sx); 2870 2871 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2872 error = ENOMEM; 2873 2874 out: 2875 if (ifp != NULL) 2876 if_rele(ifp); 2877 2878 return (error); 2879 } 2880 2881 static int 2882 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2883 { 2884 struct nl_carp_parsed attrs = { }; 2885 struct carpkreq carpr; 2886 struct epoch_tracker et; 2887 if_t ifp = NULL; 2888 int error; 2889 2890 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2891 if (error != 0) 2892 return (error); 2893 2894 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2895 return (EINVAL); 2896 if (attrs.state > CARP_MAXSTATE) 2897 return (EINVAL); 2898 if (attrs.version == 0) /* compat with pre-VRRPv3 */ 2899 attrs.version = CARP_VERSION_CARP; 2900 switch (attrs.version) { 2901 case CARP_VERSION_CARP: 2902 if (attrs.advbase < 0 || attrs.advskew < 0) 2903 return (EINVAL); 2904 if (attrs.advbase > 255) 2905 return (EINVAL); 2906 if (attrs.advskew >= 255) 2907 return (EINVAL); 2908 break; 2909 case CARP_VERSION_VRRPv3: 2910 if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) 2911 return (EINVAL); 2912 break; 2913 default: 2914 return (EINVAL); 2915 } 2916 2917 NET_EPOCH_ENTER(et); 2918 if (attrs.ifname != NULL) 2919 ifp = ifunit_ref(attrs.ifname); 2920 else if (attrs.ifindex != 0) 2921 ifp = ifnet_byindex_ref(attrs.ifindex); 2922 NET_EPOCH_EXIT(et); 2923 2924 if ((error = carp_is_supported_if(ifp)) != 0) 2925 goto out; 2926 2927 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2928 error = EADDRNOTAVAIL; 2929 goto out; 2930 } 2931 2932 carpr.carpr_count = 1; 2933 carpr.carpr_vhid = attrs.vhid; 2934 carpr.carpr_state = attrs.state; 2935 carpr.carpr_version = attrs.version; 2936 switch (attrs.version) { 2937 case CARP_VERSION_CARP: 2938 carpr.carpr_advbase = attrs.advbase; 2939 carpr.carpr_advskew = attrs.advskew; 2940 carpr.carpr_addr = attrs.addr; 2941 carpr.carpr_addr6 = attrs.addr6; 2942 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2943 break; 2944 case CARP_VERSION_VRRPv3: 2945 carpr.carpr_vrrp_priority = attrs.vrrp_prio; 2946 carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; 2947 break; 2948 } 2949 2950 sx_xlock(&carp_sx); 2951 error = carp_ioctl_set(ifp, &carpr); 2952 sx_xunlock(&carp_sx); 2953 2954 out: 2955 if (ifp != NULL) 2956 if_rele(ifp); 2957 2958 return (error); 2959 } 2960 2961 static const struct nlhdr_parser *all_parsers[] = { 2962 &carp_parser 2963 }; 2964 2965 static const struct genl_cmd carp_cmds[] = { 2966 { 2967 .cmd_num = CARP_NL_CMD_GET, 2968 .cmd_name = "SIOCGVH", 2969 .cmd_cb = carp_nl_get, 2970 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2971 GENL_CMD_CAP_HASPOL, 2972 }, 2973 { 2974 .cmd_num = CARP_NL_CMD_SET, 2975 .cmd_name = "SIOCSVH", 2976 .cmd_cb = carp_nl_set, 2977 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2978 .cmd_priv = PRIV_NETINET_CARP, 2979 }, 2980 }; 2981 2982 static void 2983 carp_nl_register(void) 2984 { 2985 bool ret __diagused; 2986 int family_id __diagused; 2987 2988 NL_VERIFY_PARSERS(all_parsers); 2989 family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 2990 CARP_NL_CMD_MAX); 2991 MPASS(family_id != 0); 2992 2993 ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds, 2994 NL_ARRAY_LEN(carp_cmds)); 2995 MPASS(ret); 2996 } 2997 2998 static void 2999 carp_nl_unregister(void) 3000 { 3001 genl_unregister_family(CARP_NL_FAMILY_NAME); 3002 } 3003 3004 static void 3005 carp_mod_cleanup(void) 3006 { 3007 3008 carp_nl_unregister(); 3009 3010 #ifdef INET 3011 (void)ipproto_unregister(IPPROTO_CARP); 3012 carp_iamatch_p = NULL; 3013 #endif 3014 #ifdef INET6 3015 (void)ip6proto_unregister(IPPROTO_CARP); 3016 carp_iamatch6_p = NULL; 3017 carp_macmatch6_p = NULL; 3018 #endif 3019 carp_ioctl_p = NULL; 3020 carp_attach_p = NULL; 3021 carp_detach_p = NULL; 3022 carp_get_vhid_p = NULL; 3023 carp_linkstate_p = NULL; 3024 carp_forus_p = NULL; 3025 carp_output_p = NULL; 3026 carp_demote_adj_p = NULL; 3027 carp_master_p = NULL; 3028 mtx_unlock(&carp_mtx); 3029 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 3030 mtx_destroy(&carp_mtx); 3031 sx_destroy(&carp_sx); 3032 } 3033 3034 static void 3035 ipcarp_sysinit(void) 3036 { 3037 3038 /* Load allow as tunable so to postpone carp start after module load */ 3039 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 3040 } 3041 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 3042 3043 static int 3044 carp_mod_load(void) 3045 { 3046 int err; 3047 3048 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 3049 sx_init(&carp_sx, "carp_sx"); 3050 LIST_INIT(&carp_list); 3051 carp_get_vhid_p = carp_get_vhid; 3052 carp_forus_p = carp_forus; 3053 carp_output_p = carp_output; 3054 carp_linkstate_p = carp_linkstate; 3055 carp_ioctl_p = carp_ioctl; 3056 carp_attach_p = carp_attach; 3057 carp_detach_p = carp_detach; 3058 carp_demote_adj_p = carp_demote_adj; 3059 carp_master_p = carp_master; 3060 #ifdef INET6 3061 carp_iamatch6_p = carp_iamatch6; 3062 carp_macmatch6_p = carp_macmatch6; 3063 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 3064 if (err) { 3065 printf("carp: error %d registering with INET6\n", err); 3066 carp_mod_cleanup(); 3067 return (err); 3068 } 3069 #endif 3070 #ifdef INET 3071 carp_iamatch_p = carp_iamatch; 3072 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 3073 if (err) { 3074 printf("carp: error %d registering with INET\n", err); 3075 carp_mod_cleanup(); 3076 return (err); 3077 } 3078 #endif 3079 3080 carp_nl_register(); 3081 3082 return (0); 3083 } 3084 3085 static int 3086 carp_modevent(module_t mod, int type, void *data) 3087 { 3088 switch (type) { 3089 case MOD_LOAD: 3090 return carp_mod_load(); 3091 /* NOTREACHED */ 3092 case MOD_UNLOAD: 3093 mtx_lock(&carp_mtx); 3094 if (LIST_EMPTY(&carp_list)) 3095 carp_mod_cleanup(); 3096 else { 3097 mtx_unlock(&carp_mtx); 3098 return (EBUSY); 3099 } 3100 break; 3101 3102 default: 3103 return (EINVAL); 3104 } 3105 3106 return (0); 3107 } 3108 3109 static moduledata_t carp_mod = { 3110 "carp", 3111 carp_modevent, 3112 0 3113 }; 3114 3115 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 3116