1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/devctl.h> 39 #include <sys/jail.h> 40 #include <sys/kassert.h> 41 #include <sys/kernel.h> 42 #include <sys/limits.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/module.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_dl.h> 59 #include <net/if_llatbl.h> 60 #include <net/if_private.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/vnet.h> 64 65 #if defined(INET) || defined(INET6) 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/ip_carp.h> 69 #include <netinet/ip_carp_nl.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <netlink/netlink.h> 88 #include <netlink/netlink_ctl.h> 89 #include <netlink/netlink_generic.h> 90 #include <netlink/netlink_message_parser.h> 91 92 #include <crypto/sha1.h> 93 94 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 95 96 struct carp_softc { 97 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 98 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 99 carp_version_t sc_version; /* carp or VRRPv3 */ 100 uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ 101 struct callout sc_ad_tmo; /* Advertising timeout. */ 102 #ifdef INET 103 struct callout sc_md_tmo; /* Master down timeout. */ 104 #endif 105 #ifdef INET6 106 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 107 #endif 108 struct mtx sc_mtx; 109 110 int sc_vhid; 111 union { 112 struct { /* sc_version == CARP_VERSION_CARP */ 113 int sc_advskew; 114 int sc_advbase; 115 struct in_addr sc_carpaddr; 116 struct in6_addr sc_carpaddr6; 117 uint64_t sc_counter; 118 bool sc_init_counter; 119 #define CARP_HMAC_PAD 64 120 unsigned char sc_key[CARP_KEY_LEN]; 121 unsigned char sc_pad[CARP_HMAC_PAD]; 122 SHA1_CTX sc_sha1; 123 }; 124 struct { /* sc_version == CARP_VERSION_VRRPv3 */ 125 uint8_t sc_vrrp_prio; 126 uint16_t sc_vrrp_adv_inter; 127 uint16_t sc_vrrp_master_inter; 128 }; 129 }; 130 int sc_naddrs; 131 int sc_naddrs6; 132 int sc_ifasiz; 133 enum { INIT = 0, BACKUP, MASTER } sc_state; 134 int sc_suppress; 135 int sc_sendad_errors; 136 #define CARP_SENDAD_MAX_ERRORS 3 137 int sc_sendad_success; 138 #define CARP_SENDAD_MIN_SUCCESS 3 139 140 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 141 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 142 }; 143 144 struct carp_if { 145 #ifdef INET 146 int cif_naddrs; 147 #endif 148 #ifdef INET6 149 int cif_naddrs6; 150 #endif 151 TAILQ_HEAD(, carp_softc) cif_vrs; 152 #ifdef INET 153 struct ip_moptions cif_imo; 154 #endif 155 #ifdef INET6 156 struct ip6_moptions cif_im6o; 157 #endif 158 struct ifnet *cif_ifp; 159 struct mtx cif_mtx; 160 uint32_t cif_flags; 161 #define CIF_PROMISC 0x00000001 162 }; 163 164 /* Kernel equivalent of struct carpreq, but with more fields for new features. 165 * */ 166 struct carpkreq { 167 int carpr_count; 168 int carpr_vhid; 169 int carpr_state; 170 int carpr_advskew; 171 int carpr_advbase; 172 unsigned char carpr_key[CARP_KEY_LEN]; 173 /* Everything above this is identical to carpreq */ 174 struct in_addr carpr_addr; 175 struct in6_addr carpr_addr6; 176 carp_version_t carpr_version; 177 uint8_t carpr_vrrp_priority; 178 uint16_t carpr_vrrp_adv_inter; 179 }; 180 181 /* 182 * Brief design of carp(4). 183 * 184 * Any carp-capable ifnet may have a list of carp softcs hanging off 185 * its ifp->if_carp pointer. Each softc represents one unique virtual 186 * host id, or vhid. The softc has a back pointer to the ifnet. All 187 * softcs are joined in a global list, which has quite limited use. 188 * 189 * Any interface address that takes part in CARP negotiation has a 190 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 191 * AF_INET or AF_INET6 address. 192 * 193 * Although, one can get the softc's backpointer to ifnet and traverse 194 * through its ifp->if_addrhead queue to find all interface addresses 195 * involved in CARP, we keep a growable array of ifaddr pointers. This 196 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 197 * do calls into the network stack, thus avoiding LORs. 198 * 199 * Locking: 200 * 201 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 202 * callout-driven events and ioctl()s. 203 * 204 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 205 * To traverse the global list we use the mutex carp_mtx. 206 * 207 * Known issues with locking: 208 * 209 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 210 * counting is done on the softc. 211 * - On module unload we may race (?) with packet processing thread 212 * dereferencing our function pointers. 213 */ 214 215 /* Accept incoming CARP packets. */ 216 VNET_DEFINE_STATIC(int, carp_allow) = 1; 217 #define V_carp_allow VNET(carp_allow) 218 219 /* Set DSCP in outgoing CARP packets. */ 220 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 221 #define V_carp_dscp VNET(carp_dscp) 222 223 /* Preempt slower nodes. */ 224 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 225 #define V_carp_preempt VNET(carp_preempt) 226 227 /* Log level. */ 228 VNET_DEFINE_STATIC(int, carp_log) = 1; 229 #define V_carp_log VNET(carp_log) 230 231 /* Global advskew demotion. */ 232 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 233 #define V_carp_demotion VNET(carp_demotion) 234 235 /* Send error demotion factor. */ 236 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 237 #define V_carp_senderr_adj VNET(carp_senderr_adj) 238 239 /* Iface down demotion factor. */ 240 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 241 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 242 243 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 244 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 245 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 246 247 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 248 "CARP"); 249 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 250 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 251 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 252 "Accept incoming CARP packets"); 253 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 254 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 255 0, 0, carp_dscp_sysctl, "I", 256 "DSCP value for carp packets"); 257 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 258 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 259 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 260 &VNET_NAME(carp_log), 0, "CARP log level"); 261 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 262 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 263 0, 0, carp_demote_adj_sysctl, "I", 264 "Adjust demotion factor (skew of advskew)"); 265 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 266 CTLFLAG_VNET | CTLFLAG_RW, 267 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 268 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 269 CTLFLAG_VNET | CTLFLAG_RW, 270 &VNET_NAME(carp_ifdown_adj), 0, 271 "Interface down demotion factor adjustment"); 272 273 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 274 VNET_PCPUSTAT_SYSINIT(carpstats); 275 VNET_PCPUSTAT_SYSUNINIT(carpstats); 276 277 #define CARPSTATS_ADD(name, val) \ 278 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 279 sizeof(uint64_t)], (val)) 280 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 281 282 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 283 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 284 285 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 286 NULL, MTX_DEF) 287 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 288 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 289 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 290 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 291 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 292 NULL, MTX_DEF) 293 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 294 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 295 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 296 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 297 #define CIF_FREE(cif) do { \ 298 CIF_LOCK(cif); \ 299 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 300 carp_free_if(cif); \ 301 else \ 302 CIF_UNLOCK(cif); \ 303 } while (0) 304 305 #define CARP_LOG(...) do { \ 306 if (V_carp_log > 0) \ 307 log(LOG_INFO, "carp: " __VA_ARGS__); \ 308 } while (0) 309 310 #define CARP_DEBUG(...) do { \ 311 if (V_carp_log > 1) \ 312 log(LOG_DEBUG, __VA_ARGS__); \ 313 } while (0) 314 315 #define IFNET_FOREACH_IFA(ifp, ifa) \ 316 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 317 if ((ifa)->ifa_carp != NULL) 318 319 #define CARP_FOREACH_IFA(sc, ifa) \ 320 CARP_LOCK_ASSERT(sc); \ 321 for (int _i = 0; \ 322 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 323 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 324 ++_i) 325 326 #define IFNET_FOREACH_CARP(ifp, sc) \ 327 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 328 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 329 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 330 331 #define DEMOTE_ADVSKEW(sc) \ 332 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 333 CARP_MAXSKEW : \ 334 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 335 0 : ((sc)->sc_advskew + V_carp_demotion))) 336 337 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 338 static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); 339 static struct carp_softc 340 *carp_alloc(struct ifnet *, carp_version_t, int); 341 static void carp_destroy(struct carp_softc *); 342 static struct carp_if 343 *carp_alloc_if(struct ifnet *); 344 static void carp_free_if(struct carp_if *); 345 static void carp_set_state(struct carp_softc *, int, const char* reason); 346 static void carp_sc_state(struct carp_softc *); 347 static void carp_setrun(struct carp_softc *, sa_family_t); 348 static void carp_master_down(void *); 349 static void carp_master_down_locked(struct carp_softc *, 350 const char* reason); 351 static void carp_send_ad_locked(struct carp_softc *); 352 static void vrrp_send_ad_locked(struct carp_softc *); 353 static void carp_addroute(struct carp_softc *); 354 static void carp_ifa_addroute(struct ifaddr *); 355 static void carp_delroute(struct carp_softc *); 356 static void carp_ifa_delroute(struct ifaddr *); 357 static void carp_send_ad_all(void *, int); 358 static void carp_demote_adj(int, char *); 359 360 static LIST_HEAD(, carp_softc) carp_list; 361 static struct mtx carp_mtx; 362 static struct sx carp_sx; 363 static struct task carp_sendall_task = 364 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 365 366 static int 367 carp_is_supported_if(if_t ifp) 368 { 369 if (ifp == NULL) 370 return (ENXIO); 371 372 switch (ifp->if_type) { 373 case IFT_ETHER: 374 case IFT_L2VLAN: 375 case IFT_BRIDGE: 376 break; 377 default: 378 return (EOPNOTSUPP); 379 } 380 381 return (0); 382 } 383 384 static void 385 carp_hmac_prepare(struct carp_softc *sc) 386 { 387 uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; 388 uint8_t vhid = sc->sc_vhid & 0xff; 389 struct ifaddr *ifa; 390 int i, found; 391 #ifdef INET 392 struct in_addr last, cur, in; 393 #endif 394 #ifdef INET6 395 struct in6_addr last6, cur6, in6; 396 #endif 397 398 CARP_LOCK_ASSERT(sc); 399 MPASS(sc->sc_version == CARP_VERSION_CARP); 400 401 /* Compute ipad from key. */ 402 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 403 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 404 for (i = 0; i < sizeof(sc->sc_pad); i++) 405 sc->sc_pad[i] ^= 0x36; 406 407 /* Precompute first part of inner hash. */ 408 SHA1Init(&sc->sc_sha1); 409 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 410 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 411 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 412 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 413 #ifdef INET 414 cur.s_addr = 0; 415 do { 416 found = 0; 417 last = cur; 418 cur.s_addr = 0xffffffff; 419 CARP_FOREACH_IFA(sc, ifa) { 420 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 421 if (ifa->ifa_addr->sa_family == AF_INET && 422 ntohl(in.s_addr) > ntohl(last.s_addr) && 423 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 424 cur.s_addr = in.s_addr; 425 found++; 426 } 427 } 428 if (found) 429 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 430 } while (found); 431 #endif /* INET */ 432 #ifdef INET6 433 memset(&cur6, 0, sizeof(cur6)); 434 do { 435 found = 0; 436 last6 = cur6; 437 memset(&cur6, 0xff, sizeof(cur6)); 438 CARP_FOREACH_IFA(sc, ifa) { 439 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 440 if (IN6_IS_SCOPE_EMBED(&in6)) 441 in6.s6_addr16[1] = 0; 442 if (ifa->ifa_addr->sa_family == AF_INET6 && 443 memcmp(&in6, &last6, sizeof(in6)) > 0 && 444 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 445 cur6 = in6; 446 found++; 447 } 448 } 449 if (found) 450 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 451 } while (found); 452 #endif /* INET6 */ 453 454 /* convert ipad to opad */ 455 for (i = 0; i < sizeof(sc->sc_pad); i++) 456 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 457 } 458 459 static void 460 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 461 unsigned char md[20]) 462 { 463 SHA1_CTX sha1ctx; 464 465 CARP_LOCK_ASSERT(sc); 466 467 /* fetch first half of inner hash */ 468 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 469 470 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 471 SHA1Final(md, &sha1ctx); 472 473 /* outer hash */ 474 SHA1Init(&sha1ctx); 475 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 476 SHA1Update(&sha1ctx, md, 20); 477 SHA1Final(md, &sha1ctx); 478 } 479 480 static int 481 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 482 unsigned char md[20]) 483 { 484 unsigned char md2[20]; 485 486 CARP_LOCK_ASSERT(sc); 487 488 carp_hmac_generate(sc, counter, md2); 489 490 return (bcmp(md, md2, sizeof(md2))); 491 } 492 493 static int 494 vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) 495 { 496 uint16_t cksum; 497 498 /* 499 * Note that VRRPv3 checksums are different from CARP checksums. 500 * Carp just calculates the checksum over the packet. 501 * VRRPv3 includes the pseudo-header checksum as well. 502 */ 503 cksum = in_cksum_skip(m, off + len, off); 504 cksum -= phdrcksum; 505 506 return (cksum); 507 } 508 509 /* 510 * process input packet. 511 * we have rearranged checks order compared to the rfc, 512 * but it seems more efficient this way or not possible otherwise. 513 */ 514 #ifdef INET 515 static int 516 carp_input(struct mbuf **mp, int *offp, int proto) 517 { 518 struct mbuf *m = *mp; 519 struct ip *ip; 520 struct vrrpv3_header *vh; 521 int iplen; 522 int minlen; 523 int totlen; 524 525 iplen = *offp; 526 *mp = NULL; 527 528 CARPSTATS_INC(carps_ipackets); 529 530 if (!V_carp_allow) { 531 m_freem(m); 532 return (IPPROTO_DONE); 533 } 534 535 /* Ensure we have enough header to figure out the version. */ 536 if (m->m_pkthdr.len < iplen + sizeof(*vh)) { 537 CARPSTATS_INC(carps_badlen); 538 CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " 539 "on %s\n", __func__, m->m_len - sizeof(struct ip), 540 if_name(m->m_pkthdr.rcvif)); 541 m_freem(m); 542 return (IPPROTO_DONE); 543 } 544 545 if (m->m_len < iplen + sizeof(*vh)) { 546 if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { 547 CARPSTATS_INC(carps_hdrops); 548 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 549 return (IPPROTO_DONE); 550 } 551 } 552 ip = mtod(m, struct ip *); 553 totlen = ntohs(ip->ip_len); 554 vh = (struct vrrpv3_header *)((char *)ip + iplen); 555 556 switch (vh->vrrp_version) { 557 case CARP_VERSION_CARP: 558 minlen = sizeof(struct carp_header); 559 break; 560 case CARP_VERSION_VRRPv3: 561 minlen = sizeof(struct vrrpv3_header); 562 break; 563 default: 564 CARPSTATS_INC(carps_badver); 565 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 566 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 567 m_freem(m); 568 return (IPPROTO_DONE); 569 } 570 571 /* And now check the length again but with the real minimal length. */ 572 if (m->m_pkthdr.len < iplen + minlen) { 573 CARPSTATS_INC(carps_badlen); 574 CARP_DEBUG("%s: received len %zd < %d " 575 "on %s\n", __func__, m->m_len - sizeof(struct ip), 576 iplen + minlen, 577 if_name(m->m_pkthdr.rcvif)); 578 m_freem(m); 579 return (IPPROTO_DONE); 580 } 581 582 if (m->m_len < iplen + minlen) { 583 if ((m = m_pullup(m, iplen + minlen)) == NULL) { 584 CARPSTATS_INC(carps_hdrops); 585 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 586 return (IPPROTO_DONE); 587 } 588 ip = mtod(m, struct ip *); 589 vh = (struct vrrpv3_header *)((char *)ip + iplen); 590 } 591 592 switch (vh->vrrp_version) { 593 case CARP_VERSION_CARP: { 594 struct carp_header *ch; 595 596 /* verify the CARP checksum */ 597 if (in_cksum_skip(m, totlen, iplen)) { 598 CARPSTATS_INC(carps_badsum); 599 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 600 if_name(m->m_pkthdr.rcvif)); 601 m_freem(m); 602 break; 603 } 604 ch = (struct carp_header *)((char *)ip + iplen); 605 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 606 break; 607 } 608 case CARP_VERSION_VRRPv3: { 609 uint16_t phdrcksum; 610 611 phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 612 htonl((u_short)(totlen - iplen) + ip->ip_p)); 613 vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, 614 phdrcksum); 615 break; 616 } 617 default: 618 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 619 } 620 621 return (IPPROTO_DONE); 622 } 623 #endif 624 625 #ifdef INET6 626 static int 627 carp6_input(struct mbuf **mp, int *offp, int proto) 628 { 629 struct mbuf *m = *mp; 630 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 631 struct vrrpv3_header *vh; 632 u_int len, minlen; 633 634 CARPSTATS_INC(carps_ipackets6); 635 636 if (!V_carp_allow) { 637 m_freem(m); 638 return (IPPROTO_DONE); 639 } 640 641 /* check if received on a valid carp interface */ 642 if (m->m_pkthdr.rcvif->if_carp == NULL) { 643 CARPSTATS_INC(carps_badif); 644 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 645 __func__, if_name(m->m_pkthdr.rcvif)); 646 m_freem(m); 647 return (IPPROTO_DONE); 648 } 649 650 if (m->m_len < *offp + sizeof(*vh)) { 651 len = m->m_len; 652 m = m_pullup(m, *offp + sizeof(*vh)); 653 if (m == NULL) { 654 CARPSTATS_INC(carps_badlen); 655 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 656 return (IPPROTO_DONE); 657 } 658 ip6 = mtod(m, struct ip6_hdr *); 659 } 660 vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); 661 662 switch (vh->vrrp_version) { 663 case CARP_VERSION_CARP: 664 minlen = sizeof(struct carp_header); 665 break; 666 case CARP_VERSION_VRRPv3: 667 minlen = sizeof(struct vrrpv3_header); 668 break; 669 default: 670 CARPSTATS_INC(carps_badver); 671 CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, 672 vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); 673 m_freem(m); 674 return (IPPROTO_DONE); 675 } 676 677 /* And now check the length again but with the real minimal length. */ 678 if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { 679 CARPSTATS_INC(carps_badlen); 680 CARP_DEBUG("%s: received len %zd < %zd " 681 "on %s\n", __func__, m->m_len - sizeof(struct ip), 682 sizeof(*ip6) + minlen, 683 if_name(m->m_pkthdr.rcvif)); 684 m_freem(m); 685 return (IPPROTO_DONE); 686 } 687 688 if (m->m_len < sizeof(*ip6) + minlen) { 689 if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { 690 CARPSTATS_INC(carps_hdrops); 691 CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); 692 return (IPPROTO_DONE); 693 } 694 ip6 = mtod(m, struct ip6_hdr *); 695 vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 696 } 697 698 switch (vh->vrrp_version) { 699 case CARP_VERSION_CARP: { 700 struct carp_header *ch; 701 702 /* verify the CARP checksum */ 703 if (in_cksum_skip(m, *offp + sizeof(struct carp_header), 704 *offp)) { 705 CARPSTATS_INC(carps_badsum); 706 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 707 if_name(m->m_pkthdr.rcvif)); 708 m_freem(m); 709 break; 710 } 711 ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); 712 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 713 break; 714 } 715 case CARP_VERSION_VRRPv3: { 716 uint16_t phdrcksum; 717 718 phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), 719 ip6->ip6_nxt, 0); 720 vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, 721 ntohs(ip6->ip6_plen), phdrcksum); 722 break; 723 } 724 default: 725 KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); 726 } 727 return (IPPROTO_DONE); 728 } 729 #endif /* INET6 */ 730 731 /* 732 * This routine should not be necessary at all, but some switches 733 * (VMWare ESX vswitches) can echo our own packets back at us, 734 * and we must ignore them or they will cause us to drop out of 735 * MASTER mode. 736 * 737 * We cannot catch all cases of network loops. Instead, what we 738 * do here is catch any packet that arrives with a carp header 739 * with a VHID of 0, that comes from an address that is our own. 740 * These packets are by definition "from us" (even if they are from 741 * a misconfigured host that is pretending to be us). 742 * 743 * The VHID test is outside this mini-function. 744 */ 745 static int 746 carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 747 { 748 #ifdef INET 749 struct ip *ip4; 750 struct in_addr in4; 751 #endif 752 #ifdef INET6 753 struct ip6_hdr *ip6; 754 struct in6_addr in6; 755 #endif 756 757 switch (af) { 758 #ifdef INET 759 case AF_INET: 760 ip4 = mtod(m, struct ip *); 761 in4 = ifatoia(ifa)->ia_addr.sin_addr; 762 return (in4.s_addr == ip4->ip_src.s_addr); 763 #endif 764 #ifdef INET6 765 case AF_INET6: 766 ip6 = mtod(m, struct ip6_hdr *); 767 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 768 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 769 #endif 770 default: 771 break; 772 } 773 return (0); 774 } 775 776 static struct ifaddr * 777 carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) 778 { 779 struct ifnet *ifp = m->m_pkthdr.rcvif; 780 struct ifaddr *ifa, *match; 781 int error; 782 783 NET_EPOCH_ASSERT(); 784 785 /* 786 * Verify that the VHID is valid on the receiving interface. 787 * 788 * There should be just one match. If there are none 789 * the VHID is not valid and we drop the packet. If 790 * there are multiple VHID matches, take just the first 791 * one, for compatibility with previous code. While we're 792 * scanning, check for obvious loops in the network topology 793 * (these should never happen, and as noted above, we may 794 * miss real loops; this is just a double-check). 795 */ 796 error = 0; 797 match = NULL; 798 IFNET_FOREACH_IFA(ifp, ifa) { 799 if (match == NULL && ifa->ifa_carp != NULL && 800 ifa->ifa_addr->sa_family == af && 801 ifa->ifa_carp->sc_vhid == vhid) 802 match = ifa; 803 if (vhid == 0 && carp_source_is_self(m, ifa, af)) 804 error = ELOOP; 805 } 806 ifa = error ? NULL : match; 807 if (ifa != NULL) 808 ifa_ref(ifa); 809 810 if (ifa == NULL) { 811 if (error == ELOOP) { 812 CARP_DEBUG("dropping looped packet on interface %s\n", 813 if_name(ifp)); 814 CARPSTATS_INC(carps_badif); /* ??? */ 815 } else { 816 CARPSTATS_INC(carps_badvhid); 817 } 818 } 819 820 return (ifa); 821 } 822 823 static void 824 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 825 { 826 struct ifnet *ifp = m->m_pkthdr.rcvif; 827 struct ifaddr *ifa; 828 struct carp_softc *sc; 829 uint64_t tmp_counter; 830 struct timeval sc_tv, ch_tv; 831 bool multicast = false; 832 833 NET_EPOCH_ASSERT(); 834 MPASS(ch->carp_version == CARP_VERSION_CARP); 835 836 ifa = carp_find_ifa(m, af, ch->carp_vhid); 837 if (ifa == NULL) { 838 m_freem(m); 839 return; 840 } 841 842 sc = ifa->ifa_carp; 843 CARP_LOCK(sc); 844 845 /* verify the CARP version. */ 846 if (sc->sc_version != CARP_VERSION_CARP) { 847 CARP_UNLOCK(sc); 848 849 CARPSTATS_INC(carps_badver); 850 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 851 ch->carp_version); 852 ifa_free(ifa); 853 m_freem(m); 854 return; 855 } 856 857 if (ifa->ifa_addr->sa_family == AF_INET) { 858 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); 859 } else { 860 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 861 } 862 ifa_free(ifa); 863 864 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 865 if (multicast && ttl != CARP_DFLTTL) { 866 CARPSTATS_INC(carps_badttl); 867 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 868 ttl, if_name(m->m_pkthdr.rcvif)); 869 goto out; 870 } 871 872 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 873 CARPSTATS_INC(carps_badauth); 874 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 875 sc->sc_vhid, if_name(ifp)); 876 goto out; 877 } 878 879 tmp_counter = ntohl(ch->carp_counter[0]); 880 tmp_counter = tmp_counter<<32; 881 tmp_counter += ntohl(ch->carp_counter[1]); 882 883 /* XXX Replay protection goes here */ 884 885 sc->sc_init_counter = false; 886 sc->sc_counter = tmp_counter; 887 888 sc_tv.tv_sec = sc->sc_advbase; 889 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 890 ch_tv.tv_sec = ch->carp_advbase; 891 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 892 893 switch (sc->sc_state) { 894 case INIT: 895 break; 896 case MASTER: 897 /* 898 * If we receive an advertisement from a master who's going to 899 * be more frequent than us, go into BACKUP state. 900 */ 901 if (timevalcmp(&sc_tv, &ch_tv, >) || 902 timevalcmp(&sc_tv, &ch_tv, ==)) { 903 callout_stop(&sc->sc_ad_tmo); 904 carp_set_state(sc, BACKUP, 905 "more frequent advertisement received"); 906 carp_setrun(sc, 0); 907 carp_delroute(sc); 908 } 909 break; 910 case BACKUP: 911 /* 912 * If we're pre-empting masters who advertise slower than us, 913 * and this one claims to be slower, treat him as down. 914 */ 915 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 916 carp_master_down_locked(sc, 917 "preempting a slower master"); 918 break; 919 } 920 921 /* 922 * If the master is going to advertise at such a low frequency 923 * that he's guaranteed to time out, we'd might as well just 924 * treat him as timed out now. 925 */ 926 sc_tv.tv_sec = sc->sc_advbase * 3; 927 if (timevalcmp(&sc_tv, &ch_tv, <)) { 928 carp_master_down_locked(sc, "master will time out"); 929 break; 930 } 931 932 /* 933 * Otherwise, we reset the counter and wait for the next 934 * advertisement. 935 */ 936 carp_setrun(sc, af); 937 break; 938 } 939 940 out: 941 CARP_UNLOCK(sc); 942 m_freem(m); 943 } 944 945 static void 946 vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, 947 int len, uint16_t phdrcksum) 948 { 949 struct vrrpv3_header *vh = mtodo(m, off); 950 struct ifnet *ifp = m->m_pkthdr.rcvif; 951 struct ifaddr *ifa; 952 struct carp_softc *sc; 953 954 NET_EPOCH_ASSERT(); 955 MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); 956 957 ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); 958 if (ifa == NULL) { 959 m_freem(m); 960 return; 961 } 962 963 sc = ifa->ifa_carp; 964 CARP_LOCK(sc); 965 966 ifa_free(ifa); 967 968 /* verify the CARP version. */ 969 if (sc->sc_version != CARP_VERSION_VRRPv3) { 970 CARP_UNLOCK(sc); 971 972 CARPSTATS_INC(carps_badver); 973 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 974 vh->vrrp_version); 975 m_freem(m); 976 return; 977 } 978 979 /* verify that the IP TTL is 255. */ 980 if (ttl != CARP_DFLTTL) { 981 CARPSTATS_INC(carps_badttl); 982 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 983 ttl, if_name(m->m_pkthdr.rcvif)); 984 goto out; 985 } 986 987 if (vrrp_checksum_verify(m, off, len, phdrcksum)) { 988 CARPSTATS_INC(carps_badsum); 989 CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, 990 sc->sc_vhid, if_name(ifp)); 991 goto out; 992 } 993 994 /* RFC9568, 7.1 Receiving VRRP packets. */ 995 if (sc->sc_vrrp_prio == 255) { 996 CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", 997 __func__); 998 goto out; 999 } 1000 1001 /* XXX TODO Check IP address payload. */ 1002 1003 sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); 1004 1005 switch (sc->sc_state) { 1006 case INIT: 1007 break; 1008 case MASTER: 1009 /* 1010 * If we receive an advertisement from a master who's going to 1011 * be more frequent than us, go into BACKUP state. 1012 * Same if the peer has a higher priority than us. 1013 */ 1014 if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || 1015 vh->vrrp_priority > sc->sc_vrrp_prio) { 1016 callout_stop(&sc->sc_ad_tmo); 1017 carp_set_state(sc, BACKUP, 1018 "more frequent advertisement received"); 1019 carp_setrun(sc, 0); 1020 carp_delroute(sc); 1021 } 1022 break; 1023 case BACKUP: 1024 /* 1025 * If we're pre-empting masters who advertise slower than us, 1026 * and this one claims to be slower, treat him as down. 1027 */ 1028 if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter 1029 || vh->vrrp_priority < sc->sc_vrrp_prio)) { 1030 carp_master_down_locked(sc, 1031 "preempting a slower master"); 1032 break; 1033 } 1034 1035 /* 1036 * Otherwise, we reset the counter and wait for the next 1037 * advertisement. 1038 */ 1039 carp_setrun(sc, af); 1040 break; 1041 } 1042 1043 out: 1044 CARP_UNLOCK(sc); 1045 m_freem(m); 1046 } 1047 1048 static int 1049 carp_tag(struct carp_softc *sc, struct mbuf *m) 1050 { 1051 struct m_tag *mtag; 1052 1053 /* Tag packet for carp_output */ 1054 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), 1055 M_NOWAIT)) == NULL) { 1056 m_freem(m); 1057 CARPSTATS_INC(carps_onomem); 1058 return (ENOMEM); 1059 } 1060 bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); 1061 m_tag_prepend(m, mtag); 1062 1063 return (0); 1064 } 1065 1066 static void 1067 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 1068 { 1069 1070 MPASS(sc->sc_version == CARP_VERSION_CARP); 1071 1072 if (sc->sc_init_counter) { 1073 /* this could also be seconds since unix epoch */ 1074 sc->sc_counter = arc4random(); 1075 sc->sc_counter = sc->sc_counter << 32; 1076 sc->sc_counter += arc4random(); 1077 } else 1078 sc->sc_counter++; 1079 1080 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 1081 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 1082 1083 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1084 } 1085 1086 static inline void 1087 send_ad_locked(struct carp_softc *sc) 1088 { 1089 switch (sc->sc_version) { 1090 case CARP_VERSION_CARP: 1091 carp_send_ad_locked(sc); 1092 break; 1093 case CARP_VERSION_VRRPv3: 1094 vrrp_send_ad_locked(sc); 1095 break; 1096 } 1097 } 1098 1099 /* 1100 * To avoid LORs and possible recursions this function shouldn't 1101 * be called directly, but scheduled via taskqueue. 1102 */ 1103 static void 1104 carp_send_ad_all(void *ctx __unused, int pending __unused) 1105 { 1106 struct carp_softc *sc; 1107 struct epoch_tracker et; 1108 1109 NET_EPOCH_ENTER(et); 1110 mtx_lock(&carp_mtx); 1111 LIST_FOREACH(sc, &carp_list, sc_next) 1112 if (sc->sc_state == MASTER) { 1113 CARP_LOCK(sc); 1114 CURVNET_SET(sc->sc_carpdev->if_vnet); 1115 send_ad_locked(sc); 1116 CURVNET_RESTORE(); 1117 CARP_UNLOCK(sc); 1118 } 1119 mtx_unlock(&carp_mtx); 1120 NET_EPOCH_EXIT(et); 1121 } 1122 1123 /* Send a periodic advertisement, executed in callout context. */ 1124 static void 1125 carp_callout(void *v) 1126 { 1127 struct carp_softc *sc = v; 1128 struct epoch_tracker et; 1129 1130 NET_EPOCH_ENTER(et); 1131 CARP_LOCK_ASSERT(sc); 1132 CURVNET_SET(sc->sc_carpdev->if_vnet); 1133 send_ad_locked(sc); 1134 CURVNET_RESTORE(); 1135 CARP_UNLOCK(sc); 1136 NET_EPOCH_EXIT(et); 1137 } 1138 1139 static void 1140 carp_send_ad_error(struct carp_softc *sc, int error) 1141 { 1142 1143 /* 1144 * We track errors and successful sends with this logic: 1145 * - Any error resets success counter to 0. 1146 * - MAX_ERRORS triggers demotion. 1147 * - MIN_SUCCESS successes resets error counter to 0. 1148 * - MIN_SUCCESS reverts demotion, if it was triggered before. 1149 */ 1150 if (error) { 1151 if (sc->sc_sendad_errors < INT_MAX) 1152 sc->sc_sendad_errors++; 1153 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1154 static const char fmt[] = "send error %d on %s"; 1155 char msg[sizeof(fmt) + IFNAMSIZ]; 1156 1157 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 1158 carp_demote_adj(V_carp_senderr_adj, msg); 1159 } 1160 sc->sc_sendad_success = 0; 1161 } else if (sc->sc_sendad_errors > 0) { 1162 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 1163 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1164 static const char fmt[] = "send ok on %s"; 1165 char msg[sizeof(fmt) + IFNAMSIZ]; 1166 1167 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 1168 carp_demote_adj(-V_carp_senderr_adj, msg); 1169 } 1170 sc->sc_sendad_errors = 0; 1171 } 1172 } 1173 } 1174 1175 /* 1176 * Pick the best ifaddr on the given ifp for sending CARP 1177 * advertisements. 1178 * 1179 * "Best" here is defined by ifa_preferred(). This function is much 1180 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 1181 * 1182 * (This could be simplified to return the actual address, except that 1183 * it has a different format in AF_INET and AF_INET6.) 1184 */ 1185 static struct ifaddr * 1186 carp_best_ifa(int af, struct ifnet *ifp) 1187 { 1188 struct ifaddr *ifa, *best; 1189 1190 NET_EPOCH_ASSERT(); 1191 1192 if (af >= AF_MAX) 1193 return (NULL); 1194 best = NULL; 1195 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1196 if (ifa->ifa_addr->sa_family == af && 1197 (best == NULL || ifa_preferred(best, ifa))) 1198 best = ifa; 1199 } 1200 if (best != NULL) 1201 ifa_ref(best); 1202 return (best); 1203 } 1204 1205 static void 1206 carp_send_ad_locked(struct carp_softc *sc) 1207 { 1208 struct carp_header ch; 1209 struct timeval tv; 1210 struct ifaddr *ifa; 1211 struct carp_header *ch_ptr; 1212 struct mbuf *m; 1213 int len, advskew; 1214 1215 NET_EPOCH_ASSERT(); 1216 CARP_LOCK_ASSERT(sc); 1217 MPASS(sc->sc_version == CARP_VERSION_CARP); 1218 1219 advskew = DEMOTE_ADVSKEW(sc); 1220 tv.tv_sec = sc->sc_advbase; 1221 tv.tv_usec = advskew * 1000000 / 256; 1222 1223 ch.carp_version = CARP_VERSION_CARP; 1224 ch.carp_type = CARP_ADVERTISEMENT; 1225 ch.carp_vhid = sc->sc_vhid; 1226 ch.carp_advbase = sc->sc_advbase; 1227 ch.carp_advskew = advskew; 1228 ch.carp_authlen = 7; /* XXX DEFINE */ 1229 ch.carp_pad1 = 0; /* must be zero */ 1230 ch.carp_cksum = 0; 1231 1232 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 1233 1234 #ifdef INET 1235 if (sc->sc_naddrs) { 1236 struct ip *ip; 1237 1238 m = m_gethdr(M_NOWAIT, MT_DATA); 1239 if (m == NULL) { 1240 CARPSTATS_INC(carps_onomem); 1241 goto resched; 1242 } 1243 len = sizeof(*ip) + sizeof(ch); 1244 m->m_pkthdr.len = len; 1245 m->m_pkthdr.rcvif = NULL; 1246 m->m_len = len; 1247 M_ALIGN(m, m->m_len); 1248 if (IN_MULTICAST(sc->sc_carpaddr.s_addr)) 1249 m->m_flags |= M_MCAST; 1250 ip = mtod(m, struct ip *); 1251 ip->ip_v = IPVERSION; 1252 ip->ip_hl = sizeof(*ip) >> 2; 1253 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1254 ip->ip_len = htons(len); 1255 ip->ip_off = htons(IP_DF); 1256 ip->ip_ttl = CARP_DFLTTL; 1257 ip->ip_p = IPPROTO_CARP; 1258 ip->ip_sum = 0; 1259 ip_fillid(ip); 1260 1261 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1262 if (ifa != NULL) { 1263 ip->ip_src.s_addr = 1264 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1265 ifa_free(ifa); 1266 } else 1267 ip->ip_src.s_addr = 0; 1268 ip->ip_dst = sc->sc_carpaddr; 1269 1270 ch_ptr = (struct carp_header *)(&ip[1]); 1271 bcopy(&ch, ch_ptr, sizeof(ch)); 1272 carp_prepare_ad(m, sc, ch_ptr); 1273 if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && 1274 carp_tag(sc, m) != 0) 1275 goto resched; 1276 1277 m->m_data += sizeof(*ip); 1278 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1279 m->m_data -= sizeof(*ip); 1280 1281 CARPSTATS_INC(carps_opackets); 1282 1283 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1284 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1285 } 1286 #endif /* INET */ 1287 #ifdef INET6 1288 if (sc->sc_naddrs6) { 1289 struct ip6_hdr *ip6; 1290 1291 m = m_gethdr(M_NOWAIT, MT_DATA); 1292 if (m == NULL) { 1293 CARPSTATS_INC(carps_onomem); 1294 goto resched; 1295 } 1296 len = sizeof(*ip6) + sizeof(ch); 1297 m->m_pkthdr.len = len; 1298 m->m_pkthdr.rcvif = NULL; 1299 m->m_len = len; 1300 M_ALIGN(m, m->m_len); 1301 ip6 = mtod(m, struct ip6_hdr *); 1302 bzero(ip6, sizeof(*ip6)); 1303 ip6->ip6_vfc |= IPV6_VERSION; 1304 /* Traffic class isn't defined in ip6 struct instead 1305 * it gets offset into flowid field */ 1306 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1307 IPTOS_DSCP_OFFSET)); 1308 ip6->ip6_hlim = CARP_DFLTTL; 1309 ip6->ip6_nxt = IPPROTO_CARP; 1310 1311 /* set the source address */ 1312 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1313 if (ifa != NULL) { 1314 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1315 sizeof(struct in6_addr)); 1316 ifa_free(ifa); 1317 } else 1318 /* This should never happen with IPv6. */ 1319 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1320 1321 /* Set the multicast destination. */ 1322 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1323 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || 1324 IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { 1325 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1326 m_freem(m); 1327 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1328 goto resched; 1329 } 1330 } 1331 1332 ch_ptr = (struct carp_header *)(&ip6[1]); 1333 bcopy(&ch, ch_ptr, sizeof(ch)); 1334 carp_prepare_ad(m, sc, ch_ptr); 1335 if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && 1336 carp_tag(sc, m) != 0) 1337 goto resched; 1338 1339 m->m_data += sizeof(*ip6); 1340 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1341 m->m_data -= sizeof(*ip6); 1342 1343 CARPSTATS_INC(carps_opackets6); 1344 1345 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1346 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1347 } 1348 #endif /* INET6 */ 1349 1350 resched: 1351 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); 1352 } 1353 1354 static void 1355 vrrp_send_ad_locked(struct carp_softc *sc) 1356 { 1357 struct vrrpv3_header *vh_ptr; 1358 struct ifaddr *ifa; 1359 struct mbuf *m; 1360 int len; 1361 struct vrrpv3_header vh = { 1362 .vrrp_version = CARP_VERSION_VRRPv3, 1363 .vrrp_type = VRRP_TYPE_ADVERTISEMENT, 1364 .vrrp_vrtid = sc->sc_vhid, 1365 .vrrp_priority = sc->sc_vrrp_prio, 1366 .vrrp_count_addr = 0, 1367 .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), 1368 .vrrp_checksum = 0, 1369 }; 1370 1371 NET_EPOCH_ASSERT(); 1372 CARP_LOCK_ASSERT(sc); 1373 MPASS(sc->sc_version == CARP_VERSION_VRRPv3); 1374 1375 #ifdef INET 1376 if (sc->sc_naddrs) { 1377 struct ip *ip; 1378 1379 m = m_gethdr(M_NOWAIT, MT_DATA); 1380 if (m == NULL) { 1381 CARPSTATS_INC(carps_onomem); 1382 goto resched; 1383 } 1384 len = sizeof(*ip) + sizeof(vh); 1385 m->m_pkthdr.len = len; 1386 m->m_pkthdr.rcvif = NULL; 1387 m->m_len = len; 1388 M_ALIGN(m, m->m_len); 1389 m->m_flags |= M_MCAST; 1390 ip = mtod(m, struct ip *); 1391 ip->ip_v = IPVERSION; 1392 ip->ip_hl = sizeof(*ip) >> 2; 1393 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1394 ip->ip_off = htons(IP_DF); 1395 ip->ip_ttl = CARP_DFLTTL; 1396 ip->ip_p = IPPROTO_CARP; 1397 ip->ip_sum = 0; 1398 ip_fillid(ip); 1399 1400 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1401 if (ifa != NULL) { 1402 ip->ip_src.s_addr = 1403 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1404 ifa_free(ifa); 1405 } else 1406 ip->ip_src.s_addr = 0; 1407 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1408 1409 /* Include the IP addresses in the announcement. */ 1410 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1411 struct sockaddr_in *in; 1412 1413 MPASS(sc->sc_ifas[i] != NULL); 1414 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) 1415 continue; 1416 1417 in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; 1418 1419 if (m_append(m, sizeof(in->sin_addr), 1420 (caddr_t)&in->sin_addr) != 1) { 1421 m_freem(m); 1422 goto resched; 1423 } 1424 1425 vh.vrrp_count_addr++; 1426 len += sizeof(in->sin_addr); 1427 } 1428 ip->ip_len = htons(len); 1429 1430 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); 1431 bcopy(&vh, vh_ptr, sizeof(vh)); 1432 1433 vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, 1434 ip->ip_dst.s_addr, 1435 htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); 1436 vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); 1437 1438 if (carp_tag(sc, m)) 1439 goto resched; 1440 1441 CARPSTATS_INC(carps_opackets); 1442 1443 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1444 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1445 } 1446 #endif 1447 #ifdef INET6 1448 if (sc->sc_naddrs6) { 1449 struct ip6_hdr *ip6; 1450 1451 m = m_gethdr(M_NOWAIT, MT_DATA); 1452 if (m == NULL) { 1453 CARPSTATS_INC(carps_onomem); 1454 goto resched; 1455 } 1456 len = sizeof(*ip6) + sizeof(vh); 1457 m->m_pkthdr.len = len; 1458 m->m_pkthdr.rcvif = NULL; 1459 m->m_len = len; 1460 M_ALIGN(m, m->m_len); 1461 m->m_flags |= M_MCAST; 1462 ip6 = mtod(m, struct ip6_hdr *); 1463 bzero(ip6, sizeof(*ip6)); 1464 ip6->ip6_vfc |= IPV6_VERSION; 1465 /* Traffic class isn't defined in ip6 struct instead 1466 * it gets offset into flowid field */ 1467 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1468 IPTOS_DSCP_OFFSET)); 1469 ip6->ip6_hlim = CARP_DFLTTL; 1470 ip6->ip6_nxt = IPPROTO_CARP; 1471 1472 /* set the source address */ 1473 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1474 if (ifa != NULL) { 1475 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1476 sizeof(struct in6_addr)); 1477 ifa_free(ifa); 1478 } else 1479 /* This should never happen with IPv6. */ 1480 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1481 1482 /* Set the multicast destination. */ 1483 bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); 1484 ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1485 ip6->ip6_dst.s6_addr8[15] = 0x12; 1486 1487 /* Include the IP addresses in the announcement. */ 1488 len = sizeof(vh); 1489 for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { 1490 struct sockaddr_in6 *in6; 1491 1492 MPASS(sc->sc_ifas[i] != NULL); 1493 if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) 1494 continue; 1495 1496 in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; 1497 1498 if (m_append(m, sizeof(in6->sin6_addr), 1499 (char *)&in6->sin6_addr) != 1) { 1500 m_freem(m); 1501 goto resched; 1502 } 1503 1504 vh.vrrp_count_addr++; 1505 len += sizeof(in6->sin6_addr); 1506 } 1507 ip6->ip6_plen = htonl(len); 1508 1509 vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); 1510 bcopy(&vh, vh_ptr, sizeof(vh)); 1511 1512 vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); 1513 vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); 1514 1515 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1516 m_freem(m); 1517 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1518 goto resched; 1519 } 1520 1521 if (carp_tag(sc, m)) 1522 goto resched; 1523 CARPSTATS_INC(carps_opackets6); 1524 1525 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1526 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1527 } 1528 #endif 1529 1530 resched: 1531 callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, 1532 carp_callout, sc); 1533 } 1534 1535 static void 1536 carp_addroute(struct carp_softc *sc) 1537 { 1538 struct ifaddr *ifa; 1539 1540 CARP_FOREACH_IFA(sc, ifa) 1541 carp_ifa_addroute(ifa); 1542 } 1543 1544 static void 1545 carp_ifa_addroute(struct ifaddr *ifa) 1546 { 1547 1548 switch (ifa->ifa_addr->sa_family) { 1549 #ifdef INET 1550 case AF_INET: 1551 in_addprefix(ifatoia(ifa)); 1552 ifa_add_loopback_route(ifa, 1553 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1554 break; 1555 #endif 1556 #ifdef INET6 1557 case AF_INET6: 1558 ifa_add_loopback_route(ifa, 1559 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1560 nd6_add_ifa_lle(ifatoia6(ifa)); 1561 break; 1562 #endif 1563 } 1564 } 1565 1566 static void 1567 carp_delroute(struct carp_softc *sc) 1568 { 1569 struct ifaddr *ifa; 1570 1571 CARP_FOREACH_IFA(sc, ifa) 1572 carp_ifa_delroute(ifa); 1573 } 1574 1575 static void 1576 carp_ifa_delroute(struct ifaddr *ifa) 1577 { 1578 1579 switch (ifa->ifa_addr->sa_family) { 1580 #ifdef INET 1581 case AF_INET: 1582 ifa_del_loopback_route(ifa, 1583 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1584 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1585 break; 1586 #endif 1587 #ifdef INET6 1588 case AF_INET6: 1589 ifa_del_loopback_route(ifa, 1590 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1591 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1592 break; 1593 #endif 1594 } 1595 } 1596 1597 int 1598 carp_master(struct ifaddr *ifa) 1599 { 1600 struct carp_softc *sc = ifa->ifa_carp; 1601 1602 return (sc->sc_state == MASTER); 1603 } 1604 1605 #ifdef INET 1606 /* 1607 * Broadcast a gratuitous ARP request containing 1608 * the virtual router MAC address for each IP address 1609 * associated with the virtual router. 1610 */ 1611 static void 1612 carp_send_arp(struct carp_softc *sc) 1613 { 1614 struct ifaddr *ifa; 1615 struct in_addr addr; 1616 1617 NET_EPOCH_ASSERT(); 1618 1619 CARP_FOREACH_IFA(sc, ifa) { 1620 if (ifa->ifa_addr->sa_family != AF_INET) 1621 continue; 1622 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1623 arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); 1624 } 1625 } 1626 1627 int 1628 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1629 { 1630 struct carp_softc *sc = ifa->ifa_carp; 1631 1632 if (sc->sc_state == MASTER) { 1633 *enaddr = sc->sc_addr; 1634 return (1); 1635 } 1636 1637 return (0); 1638 } 1639 #endif 1640 1641 #ifdef INET6 1642 static void 1643 carp_send_na(struct carp_softc *sc) 1644 { 1645 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1646 struct ifaddr *ifa; 1647 struct in6_addr *in6; 1648 1649 CARP_FOREACH_IFA(sc, ifa) { 1650 if (ifa->ifa_addr->sa_family != AF_INET6) 1651 continue; 1652 1653 in6 = IFA_IN6(ifa); 1654 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1655 ND_NA_FLAG_OVERRIDE, 1, NULL); 1656 DELAY(1000); /* XXX */ 1657 } 1658 } 1659 1660 /* 1661 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1662 * matches and is not a carp address. Returns NULL otherwise. 1663 */ 1664 struct ifaddr * 1665 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1666 { 1667 struct ifaddr *ifa; 1668 1669 NET_EPOCH_ASSERT(); 1670 1671 ifa = NULL; 1672 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1673 if (ifa->ifa_addr->sa_family != AF_INET6) 1674 continue; 1675 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1676 continue; 1677 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1678 ifa = NULL; 1679 else 1680 ifa_ref(ifa); 1681 break; 1682 } 1683 1684 return (ifa); 1685 } 1686 1687 char * 1688 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1689 { 1690 struct ifaddr *ifa; 1691 1692 NET_EPOCH_ASSERT(); 1693 1694 IFNET_FOREACH_IFA(ifp, ifa) 1695 if (ifa->ifa_addr->sa_family == AF_INET6 && 1696 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1697 struct carp_softc *sc = ifa->ifa_carp; 1698 struct m_tag *mtag; 1699 1700 mtag = m_tag_get(PACKET_TAG_CARP, 1701 sizeof(struct carp_softc *), M_NOWAIT); 1702 if (mtag == NULL) 1703 /* Better a bit than nothing. */ 1704 return (sc->sc_addr); 1705 1706 bcopy(&sc, mtag + 1, sizeof(sc)); 1707 m_tag_prepend(m, mtag); 1708 1709 return (sc->sc_addr); 1710 } 1711 1712 return (NULL); 1713 } 1714 #endif /* INET6 */ 1715 1716 int 1717 carp_forus(struct ifnet *ifp, u_char *dhost) 1718 { 1719 struct carp_softc *sc; 1720 uint8_t *ena = dhost; 1721 1722 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1723 return (0); 1724 1725 CIF_LOCK(ifp->if_carp); 1726 IFNET_FOREACH_CARP(ifp, sc) { 1727 /* 1728 * CARP_LOCK() is not here, since would protect nothing, but 1729 * cause deadlock with if_bridge, calling this under its lock. 1730 */ 1731 if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, 1732 ETHER_ADDR_LEN)) { 1733 CIF_UNLOCK(ifp->if_carp); 1734 return (1); 1735 } 1736 } 1737 CIF_UNLOCK(ifp->if_carp); 1738 1739 return (0); 1740 } 1741 1742 /* Master down timeout event, executed in callout context. */ 1743 static void 1744 carp_master_down(void *v) 1745 { 1746 struct carp_softc *sc = v; 1747 struct epoch_tracker et; 1748 1749 NET_EPOCH_ENTER(et); 1750 CARP_LOCK_ASSERT(sc); 1751 1752 CURVNET_SET(sc->sc_carpdev->if_vnet); 1753 if (sc->sc_state == BACKUP) { 1754 carp_master_down_locked(sc, "master timed out"); 1755 } 1756 CURVNET_RESTORE(); 1757 1758 CARP_UNLOCK(sc); 1759 NET_EPOCH_EXIT(et); 1760 } 1761 1762 static void 1763 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1764 { 1765 1766 NET_EPOCH_ASSERT(); 1767 CARP_LOCK_ASSERT(sc); 1768 1769 switch (sc->sc_state) { 1770 case BACKUP: 1771 carp_set_state(sc, MASTER, reason); 1772 send_ad_locked(sc); 1773 #ifdef INET 1774 carp_send_arp(sc); 1775 #endif 1776 #ifdef INET6 1777 carp_send_na(sc); 1778 #endif 1779 carp_setrun(sc, 0); 1780 carp_addroute(sc); 1781 break; 1782 case INIT: 1783 case MASTER: 1784 #ifdef INVARIANTS 1785 panic("carp: VHID %u@%s: master_down event in %s state\n", 1786 sc->sc_vhid, 1787 if_name(sc->sc_carpdev), 1788 sc->sc_state ? "MASTER" : "INIT"); 1789 #endif 1790 break; 1791 } 1792 } 1793 1794 /* 1795 * When in backup state, af indicates whether to reset the master down timer 1796 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1797 */ 1798 static void 1799 carp_setrun(struct carp_softc *sc, sa_family_t af) 1800 { 1801 struct timeval tv; 1802 int timeout; 1803 1804 CARP_LOCK_ASSERT(sc); 1805 1806 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1807 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1808 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1809 !V_carp_allow) 1810 return; 1811 1812 switch (sc->sc_state) { 1813 case INIT: 1814 carp_set_state(sc, BACKUP, "initialization complete"); 1815 carp_setrun(sc, 0); 1816 break; 1817 case BACKUP: 1818 callout_stop(&sc->sc_ad_tmo); 1819 1820 switch (sc->sc_version) { 1821 case CARP_VERSION_CARP: 1822 tv.tv_sec = 3 * sc->sc_advbase; 1823 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1824 timeout = tvtohz(&tv); 1825 break; 1826 case CARP_VERSION_VRRPv3: 1827 /* skew time */ 1828 timeout = (256 - sc->sc_vrrp_prio) * 1829 sc->sc_vrrp_master_inter / 256; 1830 timeout += (3 * sc->sc_vrrp_master_inter); 1831 timeout *= hz; 1832 timeout /= 100; /* master interval is in centiseconds */ 1833 break; 1834 } 1835 switch (af) { 1836 #ifdef INET 1837 case AF_INET: 1838 callout_reset(&sc->sc_md_tmo, timeout, 1839 carp_master_down, sc); 1840 break; 1841 #endif 1842 #ifdef INET6 1843 case AF_INET6: 1844 callout_reset(&sc->sc_md6_tmo, timeout, 1845 carp_master_down, sc); 1846 break; 1847 #endif 1848 default: 1849 #ifdef INET 1850 if (sc->sc_naddrs) 1851 callout_reset(&sc->sc_md_tmo, timeout, 1852 carp_master_down, sc); 1853 #endif 1854 #ifdef INET6 1855 if (sc->sc_naddrs6) 1856 callout_reset(&sc->sc_md6_tmo, timeout, 1857 carp_master_down, sc); 1858 #endif 1859 break; 1860 } 1861 break; 1862 case MASTER: 1863 switch (sc->sc_version) { 1864 case CARP_VERSION_CARP: 1865 tv.tv_sec = sc->sc_advbase; 1866 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1867 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1868 carp_callout, sc); 1869 break; 1870 case CARP_VERSION_VRRPv3: 1871 callout_reset(&sc->sc_ad_tmo, 1872 sc->sc_vrrp_adv_inter * hz / 100, 1873 carp_callout, sc); 1874 break; 1875 } 1876 break; 1877 } 1878 } 1879 1880 /* 1881 * Setup multicast structures. 1882 */ 1883 static int 1884 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1885 { 1886 struct ifnet *ifp = cif->cif_ifp; 1887 int error = 0; 1888 1889 switch (sa) { 1890 #ifdef INET 1891 case AF_INET: 1892 { 1893 struct ip_moptions *imo = &cif->cif_imo; 1894 struct in_mfilter *imf; 1895 struct in_addr addr; 1896 1897 if (ip_mfilter_first(&imo->imo_head) != NULL) 1898 return (0); 1899 1900 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1901 ip_mfilter_init(&imo->imo_head); 1902 imo->imo_multicast_vif = -1; 1903 1904 addr.s_addr = htonl(INADDR_CARP_GROUP); 1905 if ((error = in_joingroup(ifp, &addr, NULL, 1906 &imf->imf_inm)) != 0) { 1907 ip_mfilter_free(imf); 1908 break; 1909 } 1910 1911 ip_mfilter_insert(&imo->imo_head, imf); 1912 imo->imo_multicast_ifp = ifp; 1913 imo->imo_multicast_ttl = CARP_DFLTTL; 1914 imo->imo_multicast_loop = 0; 1915 break; 1916 } 1917 #endif 1918 #ifdef INET6 1919 case AF_INET6: 1920 { 1921 struct ip6_moptions *im6o = &cif->cif_im6o; 1922 struct in6_mfilter *im6f[2]; 1923 struct in6_addr in6; 1924 1925 if (ip6_mfilter_first(&im6o->im6o_head)) 1926 return (0); 1927 1928 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1929 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1930 1931 ip6_mfilter_init(&im6o->im6o_head); 1932 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1933 im6o->im6o_multicast_ifp = ifp; 1934 1935 /* Join IPv6 CARP multicast group. */ 1936 bzero(&in6, sizeof(in6)); 1937 in6.s6_addr16[0] = htons(0xff02); 1938 in6.s6_addr8[15] = 0x12; 1939 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1940 ip6_mfilter_free(im6f[0]); 1941 ip6_mfilter_free(im6f[1]); 1942 break; 1943 } 1944 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1945 ip6_mfilter_free(im6f[0]); 1946 ip6_mfilter_free(im6f[1]); 1947 break; 1948 } 1949 1950 /* Join solicited multicast address. */ 1951 bzero(&in6, sizeof(in6)); 1952 in6.s6_addr16[0] = htons(0xff02); 1953 in6.s6_addr32[1] = 0; 1954 in6.s6_addr32[2] = htonl(1); 1955 in6.s6_addr32[3] = 0; 1956 in6.s6_addr8[12] = 0xff; 1957 1958 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1959 ip6_mfilter_free(im6f[0]); 1960 ip6_mfilter_free(im6f[1]); 1961 break; 1962 } 1963 1964 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1965 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1966 ip6_mfilter_free(im6f[0]); 1967 ip6_mfilter_free(im6f[1]); 1968 break; 1969 } 1970 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1971 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1972 break; 1973 } 1974 #endif 1975 } 1976 1977 return (error); 1978 } 1979 1980 /* 1981 * Free multicast structures. 1982 */ 1983 static void 1984 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1985 { 1986 #ifdef INET 1987 struct ip_moptions *imo = &cif->cif_imo; 1988 struct in_mfilter *imf; 1989 #endif 1990 #ifdef INET6 1991 struct ip6_moptions *im6o = &cif->cif_im6o; 1992 struct in6_mfilter *im6f; 1993 #endif 1994 sx_assert(&carp_sx, SA_XLOCKED); 1995 1996 switch (sa) { 1997 #ifdef INET 1998 case AF_INET: 1999 if (cif->cif_naddrs != 0) 2000 break; 2001 2002 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2003 ip_mfilter_remove(&imo->imo_head, imf); 2004 in_leavegroup(imf->imf_inm, NULL); 2005 ip_mfilter_free(imf); 2006 } 2007 break; 2008 #endif 2009 #ifdef INET6 2010 case AF_INET6: 2011 if (cif->cif_naddrs6 != 0) 2012 break; 2013 2014 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2015 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2016 in6_leavegroup(im6f->im6f_in6m, NULL); 2017 ip6_mfilter_free(im6f); 2018 } 2019 break; 2020 #endif 2021 } 2022 } 2023 2024 int 2025 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 2026 { 2027 struct m_tag *mtag; 2028 int vhid; 2029 2030 if (!sa) 2031 return (0); 2032 2033 switch (sa->sa_family) { 2034 #ifdef INET 2035 case AF_INET: 2036 break; 2037 #endif 2038 #ifdef INET6 2039 case AF_INET6: 2040 break; 2041 #endif 2042 default: 2043 return (0); 2044 } 2045 2046 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2047 if (mtag == NULL) 2048 return (0); 2049 2050 bcopy(mtag + 1, &vhid, sizeof(vhid)); 2051 2052 /* Set the source MAC address to the Virtual Router MAC Address. */ 2053 switch (ifp->if_type) { 2054 case IFT_ETHER: 2055 case IFT_BRIDGE: 2056 case IFT_L2VLAN: { 2057 struct ether_header *eh; 2058 2059 eh = mtod(m, struct ether_header *); 2060 eh->ether_shost[0] = 0; 2061 eh->ether_shost[1] = 0; 2062 eh->ether_shost[2] = 0x5e; 2063 eh->ether_shost[3] = 0; 2064 eh->ether_shost[4] = 1; 2065 eh->ether_shost[5] = vhid; 2066 } 2067 break; 2068 default: 2069 printf("%s: carp is not supported for the %d interface type\n", 2070 if_name(ifp), ifp->if_type); 2071 return (EOPNOTSUPP); 2072 } 2073 2074 return (0); 2075 } 2076 2077 static struct carp_softc* 2078 carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) 2079 { 2080 struct carp_softc *sc; 2081 struct carp_if *cif; 2082 2083 sx_assert(&carp_sx, SA_XLOCKED); 2084 2085 if ((cif = ifp->if_carp) == NULL) 2086 cif = carp_alloc_if(ifp); 2087 2088 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); 2089 *sc = (struct carp_softc ){ 2090 .sc_vhid = vhid, 2091 .sc_version = version, 2092 .sc_state = INIT, 2093 .sc_carpdev = ifp, 2094 .sc_ifasiz = sizeof(struct ifaddr *), 2095 .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, 2096 }; 2097 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 2098 2099 switch (version) { 2100 case CARP_VERSION_CARP: 2101 sc->sc_advbase = CARP_DFLTINTV; 2102 sc->sc_init_counter = true; 2103 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 2104 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 2105 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 2106 break; 2107 case CARP_VERSION_VRRPv3: 2108 sc->sc_vrrp_adv_inter = 100; 2109 sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; 2110 sc->sc_vrrp_prio = 100; 2111 break; 2112 } 2113 2114 CARP_LOCK_INIT(sc); 2115 #ifdef INET 2116 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2117 #endif 2118 #ifdef INET6 2119 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2120 #endif 2121 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 2122 2123 CIF_LOCK(cif); 2124 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 2125 CIF_UNLOCK(cif); 2126 2127 mtx_lock(&carp_mtx); 2128 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 2129 mtx_unlock(&carp_mtx); 2130 2131 return (sc); 2132 } 2133 2134 static void 2135 carp_grow_ifas(struct carp_softc *sc) 2136 { 2137 struct ifaddr **new; 2138 2139 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 2140 CARP_LOCK(sc); 2141 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 2142 free(sc->sc_ifas, M_CARP); 2143 sc->sc_ifas = new; 2144 sc->sc_ifasiz *= 2; 2145 CARP_UNLOCK(sc); 2146 } 2147 2148 static void 2149 carp_destroy(struct carp_softc *sc) 2150 { 2151 struct ifnet *ifp = sc->sc_carpdev; 2152 struct carp_if *cif = ifp->if_carp; 2153 2154 sx_assert(&carp_sx, SA_XLOCKED); 2155 2156 if (sc->sc_suppress) 2157 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 2158 CARP_UNLOCK(sc); 2159 2160 CIF_LOCK(cif); 2161 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 2162 CIF_UNLOCK(cif); 2163 2164 mtx_lock(&carp_mtx); 2165 LIST_REMOVE(sc, sc_next); 2166 mtx_unlock(&carp_mtx); 2167 2168 callout_drain(&sc->sc_ad_tmo); 2169 #ifdef INET 2170 callout_drain(&sc->sc_md_tmo); 2171 #endif 2172 #ifdef INET6 2173 callout_drain(&sc->sc_md6_tmo); 2174 #endif 2175 CARP_LOCK_DESTROY(sc); 2176 2177 free(sc->sc_ifas, M_CARP); 2178 free(sc, M_CARP); 2179 } 2180 2181 static struct carp_if* 2182 carp_alloc_if(struct ifnet *ifp) 2183 { 2184 struct carp_if *cif; 2185 int error; 2186 2187 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 2188 2189 if ((error = ifpromisc(ifp, 1)) != 0) 2190 printf("%s: ifpromisc(%s) failed: %d\n", 2191 __func__, if_name(ifp), error); 2192 else 2193 cif->cif_flags |= CIF_PROMISC; 2194 2195 CIF_LOCK_INIT(cif); 2196 cif->cif_ifp = ifp; 2197 TAILQ_INIT(&cif->cif_vrs); 2198 2199 IF_ADDR_WLOCK(ifp); 2200 ifp->if_carp = cif; 2201 if_ref(ifp); 2202 IF_ADDR_WUNLOCK(ifp); 2203 2204 return (cif); 2205 } 2206 2207 static void 2208 carp_free_if(struct carp_if *cif) 2209 { 2210 struct ifnet *ifp = cif->cif_ifp; 2211 2212 CIF_LOCK_ASSERT(cif); 2213 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 2214 __func__)); 2215 2216 IF_ADDR_WLOCK(ifp); 2217 ifp->if_carp = NULL; 2218 IF_ADDR_WUNLOCK(ifp); 2219 2220 CIF_LOCK_DESTROY(cif); 2221 2222 if (cif->cif_flags & CIF_PROMISC) 2223 ifpromisc(ifp, 0); 2224 if_rele(ifp); 2225 2226 free(cif, M_CARP); 2227 } 2228 2229 static bool 2230 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 2231 { 2232 struct carpreq *carpr = arg; 2233 2234 CARP_LOCK(sc); 2235 carpr->carpr_state = sc->sc_state; 2236 carpr->carpr_vhid = sc->sc_vhid; 2237 switch (sc->sc_version) { 2238 case CARP_VERSION_CARP: 2239 carpr->carpr_advbase = sc->sc_advbase; 2240 carpr->carpr_advskew = sc->sc_advskew; 2241 if (priv) 2242 bcopy(sc->sc_key, carpr->carpr_key, 2243 sizeof(carpr->carpr_key)); 2244 else 2245 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 2246 break; 2247 case CARP_VERSION_VRRPv3: 2248 break; 2249 } 2250 CARP_UNLOCK(sc); 2251 2252 return (true); 2253 } 2254 2255 static int 2256 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 2257 { 2258 struct epoch_tracker et; 2259 struct carp_softc *sc = NULL; 2260 int error = 0; 2261 2262 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) 2263 return (EINVAL); 2264 2265 switch (carpr->carpr_version) { 2266 case CARP_VERSION_CARP: 2267 if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || 2268 carpr->carpr_advbase < CARP_DFLTINTV)) 2269 return (EINVAL); 2270 if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) 2271 return (EINVAL); 2272 break; 2273 case CARP_VERSION_VRRPv3: 2274 /* XXXGL: shouldn't we check anything? */ 2275 break; 2276 default: 2277 return (EINVAL); 2278 } 2279 2280 if (ifp->if_carp) { 2281 IFNET_FOREACH_CARP(ifp, sc) 2282 if (sc->sc_vhid == carpr->carpr_vhid) 2283 break; 2284 } 2285 2286 if (sc == NULL) 2287 sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); 2288 else if (sc->sc_version != carpr->carpr_version) 2289 return (EINVAL); 2290 2291 CARP_LOCK(sc); 2292 switch (sc->sc_version) { 2293 case CARP_VERSION_CARP: 2294 if (carpr->carpr_advbase != 0) 2295 sc->sc_advbase = carpr->carpr_advbase; 2296 sc->sc_advskew = carpr->carpr_advskew; 2297 if (carpr->carpr_addr.s_addr != INADDR_ANY) 2298 sc->sc_carpaddr = carpr->carpr_addr; 2299 if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 2300 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 2301 sizeof(sc->sc_carpaddr6)); 2302 } 2303 if (carpr->carpr_key[0] != '\0') { 2304 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2305 carp_hmac_prepare(sc); 2306 } 2307 break; 2308 case CARP_VERSION_VRRPv3: 2309 if (carpr->carpr_vrrp_priority != 0) 2310 sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; 2311 if (carpr->carpr_vrrp_adv_inter) 2312 sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; 2313 break; 2314 } 2315 2316 if (sc->sc_state != INIT && 2317 carpr->carpr_state != sc->sc_state) { 2318 switch (carpr->carpr_state) { 2319 case BACKUP: 2320 callout_stop(&sc->sc_ad_tmo); 2321 carp_set_state(sc, BACKUP, 2322 "user requested via ifconfig"); 2323 carp_setrun(sc, 0); 2324 carp_delroute(sc); 2325 break; 2326 case MASTER: 2327 NET_EPOCH_ENTER(et); 2328 carp_master_down_locked(sc, 2329 "user requested via ifconfig"); 2330 NET_EPOCH_EXIT(et); 2331 break; 2332 default: 2333 break; 2334 } 2335 } 2336 CARP_UNLOCK(sc); 2337 2338 return (error); 2339 } 2340 2341 static int 2342 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 2343 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 2344 { 2345 int priveleged; 2346 struct carp_softc *sc; 2347 2348 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 2349 return (EINVAL); 2350 if (carpr->carpr_count < 1) 2351 return (EMSGSIZE); 2352 if (ifp->if_carp == NULL) 2353 return (ENOENT); 2354 2355 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 2356 if (carpr->carpr_vhid != 0) { 2357 IFNET_FOREACH_CARP(ifp, sc) 2358 if (sc->sc_vhid == carpr->carpr_vhid) 2359 break; 2360 if (sc == NULL) 2361 return (ENOENT); 2362 2363 if (! outfn(arg, sc, priveleged)) 2364 return (ENOMEM); 2365 carpr->carpr_count = 1; 2366 } else { 2367 int count; 2368 2369 count = 0; 2370 IFNET_FOREACH_CARP(ifp, sc) 2371 count++; 2372 2373 if (count > carpr->carpr_count) 2374 return (EMSGSIZE); 2375 2376 IFNET_FOREACH_CARP(ifp, sc) { 2377 if (! outfn(arg, sc, priveleged)) 2378 return (ENOMEM); 2379 carpr->carpr_count = count; 2380 } 2381 } 2382 2383 return (0); 2384 } 2385 2386 int 2387 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 2388 { 2389 struct carpreq carpr; 2390 struct carpkreq carprk = { 2391 .carpr_version = CARP_VERSION_CARP, 2392 }; 2393 struct ifnet *ifp; 2394 int error = 0; 2395 2396 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 2397 return (error); 2398 2399 ifp = ifunit_ref(ifr->ifr_name); 2400 if ((error = carp_is_supported_if(ifp)) != 0) 2401 goto out; 2402 2403 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2404 error = EADDRNOTAVAIL; 2405 goto out; 2406 } 2407 2408 sx_xlock(&carp_sx); 2409 switch (cmd) { 2410 case SIOCSVH: 2411 if ((error = priv_check(td, PRIV_NETINET_CARP))) 2412 break; 2413 2414 memcpy(&carprk, &carpr, sizeof(carpr)); 2415 error = carp_ioctl_set(ifp, &carprk); 2416 break; 2417 2418 case SIOCGVH: 2419 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 2420 carp_carprcp, &carpr); 2421 if (error == 0) { 2422 error = copyout(&carpr, 2423 (char *)ifr_data_get_ptr(ifr), 2424 carpr.carpr_count * sizeof(carpr)); 2425 } 2426 break; 2427 default: 2428 error = EINVAL; 2429 } 2430 sx_xunlock(&carp_sx); 2431 2432 out: 2433 if (ifp != NULL) 2434 if_rele(ifp); 2435 2436 return (error); 2437 } 2438 2439 static int 2440 carp_get_vhid(struct ifaddr *ifa) 2441 { 2442 2443 if (ifa == NULL || ifa->ifa_carp == NULL) 2444 return (0); 2445 2446 return (ifa->ifa_carp->sc_vhid); 2447 } 2448 2449 int 2450 carp_attach(struct ifaddr *ifa, int vhid) 2451 { 2452 struct ifnet *ifp = ifa->ifa_ifp; 2453 struct carp_if *cif = ifp->if_carp; 2454 struct carp_softc *sc; 2455 int index, error; 2456 2457 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 2458 2459 switch (ifa->ifa_addr->sa_family) { 2460 #ifdef INET 2461 case AF_INET: 2462 #endif 2463 #ifdef INET6 2464 case AF_INET6: 2465 #endif 2466 break; 2467 default: 2468 return (EPROTOTYPE); 2469 } 2470 2471 sx_xlock(&carp_sx); 2472 if (ifp->if_carp == NULL) { 2473 sx_xunlock(&carp_sx); 2474 return (ENOPROTOOPT); 2475 } 2476 2477 IFNET_FOREACH_CARP(ifp, sc) 2478 if (sc->sc_vhid == vhid) 2479 break; 2480 if (sc == NULL) { 2481 sx_xunlock(&carp_sx); 2482 return (ENOENT); 2483 } 2484 2485 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2486 if (error) { 2487 CIF_FREE(cif); 2488 sx_xunlock(&carp_sx); 2489 return (error); 2490 } 2491 2492 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2493 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2494 carp_grow_ifas(sc); 2495 2496 switch (ifa->ifa_addr->sa_family) { 2497 #ifdef INET 2498 case AF_INET: 2499 cif->cif_naddrs++; 2500 sc->sc_naddrs++; 2501 break; 2502 #endif 2503 #ifdef INET6 2504 case AF_INET6: 2505 cif->cif_naddrs6++; 2506 sc->sc_naddrs6++; 2507 break; 2508 #endif 2509 } 2510 2511 ifa_ref(ifa); 2512 2513 CARP_LOCK(sc); 2514 sc->sc_ifas[index - 1] = ifa; 2515 ifa->ifa_carp = sc; 2516 if (sc->sc_version == CARP_VERSION_CARP) 2517 carp_hmac_prepare(sc); 2518 carp_sc_state(sc); 2519 CARP_UNLOCK(sc); 2520 2521 sx_xunlock(&carp_sx); 2522 2523 return (0); 2524 } 2525 2526 void 2527 carp_detach(struct ifaddr *ifa, bool keep_cif) 2528 { 2529 struct ifnet *ifp = ifa->ifa_ifp; 2530 struct carp_if *cif = ifp->if_carp; 2531 struct carp_softc *sc = ifa->ifa_carp; 2532 int i, index; 2533 2534 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2535 2536 sx_xlock(&carp_sx); 2537 2538 CARP_LOCK(sc); 2539 /* Shift array. */ 2540 index = sc->sc_naddrs + sc->sc_naddrs6; 2541 for (i = 0; i < index; i++) 2542 if (sc->sc_ifas[i] == ifa) 2543 break; 2544 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2545 for (; i < index - 1; i++) 2546 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2547 sc->sc_ifas[index - 1] = NULL; 2548 2549 switch (ifa->ifa_addr->sa_family) { 2550 #ifdef INET 2551 case AF_INET: 2552 cif->cif_naddrs--; 2553 sc->sc_naddrs--; 2554 break; 2555 #endif 2556 #ifdef INET6 2557 case AF_INET6: 2558 cif->cif_naddrs6--; 2559 sc->sc_naddrs6--; 2560 break; 2561 #endif 2562 } 2563 2564 carp_ifa_delroute(ifa); 2565 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2566 2567 ifa->ifa_carp = NULL; 2568 ifa_free(ifa); 2569 2570 if (sc->sc_version == CARP_VERSION_CARP) 2571 carp_hmac_prepare(sc); 2572 carp_sc_state(sc); 2573 2574 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2575 carp_destroy(sc); 2576 else 2577 CARP_UNLOCK(sc); 2578 2579 if (!keep_cif) 2580 CIF_FREE(cif); 2581 2582 sx_xunlock(&carp_sx); 2583 } 2584 2585 static void 2586 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2587 { 2588 2589 CARP_LOCK_ASSERT(sc); 2590 2591 if (sc->sc_state != state) { 2592 const char *carp_states[] = { CARP_STATES }; 2593 char subsys[IFNAMSIZ+5]; 2594 2595 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2596 if_name(sc->sc_carpdev)); 2597 2598 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2599 carp_states[sc->sc_state], carp_states[state], reason); 2600 2601 sc->sc_state = state; 2602 2603 devctl_notify("CARP", subsys, carp_states[state], NULL); 2604 } 2605 } 2606 2607 static void 2608 carp_linkstate(struct ifnet *ifp) 2609 { 2610 struct carp_softc *sc; 2611 2612 CIF_LOCK(ifp->if_carp); 2613 IFNET_FOREACH_CARP(ifp, sc) { 2614 CARP_LOCK(sc); 2615 carp_sc_state(sc); 2616 CARP_UNLOCK(sc); 2617 } 2618 CIF_UNLOCK(ifp->if_carp); 2619 } 2620 2621 static void 2622 carp_sc_state(struct carp_softc *sc) 2623 { 2624 2625 CARP_LOCK_ASSERT(sc); 2626 2627 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2628 !(sc->sc_carpdev->if_flags & IFF_UP) || 2629 !V_carp_allow) { 2630 callout_stop(&sc->sc_ad_tmo); 2631 #ifdef INET 2632 callout_stop(&sc->sc_md_tmo); 2633 #endif 2634 #ifdef INET6 2635 callout_stop(&sc->sc_md6_tmo); 2636 #endif 2637 carp_set_state(sc, INIT, "hardware interface down"); 2638 carp_setrun(sc, 0); 2639 carp_delroute(sc); 2640 if (!sc->sc_suppress) 2641 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2642 sc->sc_suppress = 1; 2643 } else { 2644 carp_set_state(sc, INIT, "hardware interface up"); 2645 carp_setrun(sc, 0); 2646 if (sc->sc_suppress) 2647 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2648 sc->sc_suppress = 0; 2649 } 2650 } 2651 2652 static void 2653 carp_demote_adj(int adj, char *reason) 2654 { 2655 atomic_add_int(&V_carp_demotion, adj); 2656 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2657 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2658 } 2659 2660 static int 2661 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2662 { 2663 int new, error; 2664 struct carp_softc *sc; 2665 2666 new = V_carp_allow; 2667 error = sysctl_handle_int(oidp, &new, 0, req); 2668 if (error || !req->newptr) 2669 return (error); 2670 2671 if (V_carp_allow != new) { 2672 V_carp_allow = new; 2673 2674 mtx_lock(&carp_mtx); 2675 LIST_FOREACH(sc, &carp_list, sc_next) { 2676 CARP_LOCK(sc); 2677 if (curvnet == sc->sc_carpdev->if_vnet) 2678 carp_sc_state(sc); 2679 CARP_UNLOCK(sc); 2680 } 2681 mtx_unlock(&carp_mtx); 2682 } 2683 2684 return (0); 2685 } 2686 2687 static int 2688 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2689 { 2690 int new, error; 2691 2692 new = V_carp_dscp; 2693 error = sysctl_handle_int(oidp, &new, 0, req); 2694 if (error || !req->newptr) 2695 return (error); 2696 2697 if (new < 0 || new > 63) 2698 return (EINVAL); 2699 2700 V_carp_dscp = new; 2701 2702 return (0); 2703 } 2704 2705 static int 2706 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2707 { 2708 int new, error; 2709 2710 new = V_carp_demotion; 2711 error = sysctl_handle_int(oidp, &new, 0, req); 2712 if (error || !req->newptr) 2713 return (error); 2714 2715 carp_demote_adj(new, "sysctl"); 2716 2717 return (0); 2718 } 2719 2720 static int 2721 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2722 { 2723 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2724 return (EINVAL); 2725 2726 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2727 return (0); 2728 } 2729 2730 struct carp_nl_send_args { 2731 struct nlmsghdr *hdr; 2732 struct nl_pstate *npt; 2733 }; 2734 2735 static bool 2736 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2737 { 2738 struct carp_nl_send_args *nlsa = arg; 2739 struct nlmsghdr *hdr = nlsa->hdr; 2740 struct nl_pstate *npt = nlsa->npt; 2741 struct nl_writer *nw = npt->nw; 2742 struct genlmsghdr *ghdr_new; 2743 2744 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2745 nlmsg_abort(nw); 2746 return (false); 2747 } 2748 2749 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2750 if (ghdr_new == NULL) { 2751 nlmsg_abort(nw); 2752 return (false); 2753 } 2754 2755 ghdr_new->cmd = CARP_NL_CMD_GET; 2756 ghdr_new->version = 0; 2757 ghdr_new->reserved = 0; 2758 2759 CARP_LOCK(sc); 2760 2761 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2762 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2763 nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); 2764 switch (sc->sc_version) { 2765 case CARP_VERSION_CARP: 2766 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2767 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2768 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2769 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2770 if (priv) 2771 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), 2772 sc->sc_key); 2773 break; 2774 case CARP_VERSION_VRRPv3: 2775 nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); 2776 nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, 2777 sc->sc_vrrp_adv_inter); 2778 break; 2779 } 2780 2781 CARP_UNLOCK(sc); 2782 2783 if (! nlmsg_end(nw)) { 2784 nlmsg_abort(nw); 2785 return (false); 2786 } 2787 2788 return (true); 2789 } 2790 2791 struct nl_carp_parsed { 2792 unsigned int ifindex; 2793 char *ifname; 2794 uint32_t state; 2795 uint32_t vhid; 2796 int32_t advbase; 2797 int32_t advskew; 2798 char key[CARP_KEY_LEN]; 2799 struct in_addr addr; 2800 struct in6_addr addr6; 2801 carp_version_t version; 2802 uint8_t vrrp_prio; 2803 uint16_t vrrp_adv_inter; 2804 }; 2805 2806 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2807 static const struct nlattr_parser nla_p_set[] = { 2808 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2809 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2810 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2811 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2812 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2813 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2814 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2815 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2816 { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, 2817 { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, 2818 { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, 2819 { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, 2820 }; 2821 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_empty, nla_p_set); 2822 #undef _OUT 2823 2824 2825 static int 2826 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2827 { 2828 struct nl_carp_parsed attrs = { }; 2829 struct carp_nl_send_args args; 2830 struct carpreq carpr = { }; 2831 struct epoch_tracker et; 2832 if_t ifp = NULL; 2833 int error; 2834 2835 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2836 if (error != 0) 2837 return (error); 2838 2839 NET_EPOCH_ENTER(et); 2840 if (attrs.ifname != NULL) 2841 ifp = ifunit_ref(attrs.ifname); 2842 else if (attrs.ifindex != 0) 2843 ifp = ifnet_byindex_ref(attrs.ifindex); 2844 NET_EPOCH_EXIT(et); 2845 2846 if ((error = carp_is_supported_if(ifp)) != 0) 2847 goto out; 2848 2849 hdr->nlmsg_flags |= NLM_F_MULTI; 2850 args.hdr = hdr; 2851 args.npt = npt; 2852 2853 carpr.carpr_vhid = attrs.vhid; 2854 carpr.carpr_count = CARP_MAXVHID; 2855 2856 sx_xlock(&carp_sx); 2857 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2858 carp_nl_send, &args); 2859 sx_xunlock(&carp_sx); 2860 2861 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2862 error = ENOMEM; 2863 2864 out: 2865 if (ifp != NULL) 2866 if_rele(ifp); 2867 2868 return (error); 2869 } 2870 2871 static int 2872 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2873 { 2874 struct nl_carp_parsed attrs = { }; 2875 struct carpkreq carpr; 2876 struct epoch_tracker et; 2877 if_t ifp = NULL; 2878 int error; 2879 2880 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2881 if (error != 0) 2882 return (error); 2883 2884 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2885 return (EINVAL); 2886 if (attrs.state > CARP_MAXSTATE) 2887 return (EINVAL); 2888 if (attrs.version == 0) /* compat with pre-VRRPv3 */ 2889 attrs.version = CARP_VERSION_CARP; 2890 switch (attrs.version) { 2891 case CARP_VERSION_CARP: 2892 if (attrs.advbase < 0 || attrs.advskew < 0) 2893 return (EINVAL); 2894 if (attrs.advbase > 255) 2895 return (EINVAL); 2896 if (attrs.advskew >= 255) 2897 return (EINVAL); 2898 break; 2899 case CARP_VERSION_VRRPv3: 2900 if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) 2901 return (EINVAL); 2902 break; 2903 default: 2904 return (EINVAL); 2905 } 2906 2907 NET_EPOCH_ENTER(et); 2908 if (attrs.ifname != NULL) 2909 ifp = ifunit_ref(attrs.ifname); 2910 else if (attrs.ifindex != 0) 2911 ifp = ifnet_byindex_ref(attrs.ifindex); 2912 NET_EPOCH_EXIT(et); 2913 2914 if ((error = carp_is_supported_if(ifp)) != 0) 2915 goto out; 2916 2917 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2918 error = EADDRNOTAVAIL; 2919 goto out; 2920 } 2921 2922 carpr.carpr_count = 1; 2923 carpr.carpr_vhid = attrs.vhid; 2924 carpr.carpr_state = attrs.state; 2925 carpr.carpr_version = attrs.version; 2926 switch (attrs.version) { 2927 case CARP_VERSION_CARP: 2928 carpr.carpr_advbase = attrs.advbase; 2929 carpr.carpr_advskew = attrs.advskew; 2930 carpr.carpr_addr = attrs.addr; 2931 carpr.carpr_addr6 = attrs.addr6; 2932 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2933 break; 2934 case CARP_VERSION_VRRPv3: 2935 carpr.carpr_vrrp_priority = attrs.vrrp_prio; 2936 carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; 2937 break; 2938 } 2939 2940 sx_xlock(&carp_sx); 2941 error = carp_ioctl_set(ifp, &carpr); 2942 sx_xunlock(&carp_sx); 2943 2944 out: 2945 if (ifp != NULL) 2946 if_rele(ifp); 2947 2948 return (error); 2949 } 2950 2951 static const struct nlhdr_parser *all_parsers[] = { 2952 &carp_parser 2953 }; 2954 2955 static const struct genl_cmd carp_cmds[] = { 2956 { 2957 .cmd_num = CARP_NL_CMD_GET, 2958 .cmd_name = "SIOCGVH", 2959 .cmd_cb = carp_nl_get, 2960 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2961 GENL_CMD_CAP_HASPOL, 2962 }, 2963 { 2964 .cmd_num = CARP_NL_CMD_SET, 2965 .cmd_name = "SIOCSVH", 2966 .cmd_cb = carp_nl_set, 2967 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2968 .cmd_priv = PRIV_NETINET_CARP, 2969 }, 2970 }; 2971 2972 static void 2973 carp_nl_register(void) 2974 { 2975 bool ret __diagused; 2976 int family_id __diagused; 2977 2978 NL_VERIFY_PARSERS(all_parsers); 2979 family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 2980 CARP_NL_CMD_MAX); 2981 MPASS(family_id != 0); 2982 2983 ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds, 2984 nitems(carp_cmds)); 2985 MPASS(ret); 2986 } 2987 2988 static void 2989 carp_nl_unregister(void) 2990 { 2991 genl_unregister_family(CARP_NL_FAMILY_NAME); 2992 } 2993 2994 static void 2995 carp_mod_cleanup(void) 2996 { 2997 2998 carp_nl_unregister(); 2999 3000 #ifdef INET 3001 (void)ipproto_unregister(IPPROTO_CARP); 3002 carp_iamatch_p = NULL; 3003 #endif 3004 #ifdef INET6 3005 (void)ip6proto_unregister(IPPROTO_CARP); 3006 carp_iamatch6_p = NULL; 3007 carp_macmatch6_p = NULL; 3008 #endif 3009 carp_ioctl_p = NULL; 3010 carp_attach_p = NULL; 3011 carp_detach_p = NULL; 3012 carp_get_vhid_p = NULL; 3013 carp_linkstate_p = NULL; 3014 carp_forus_p = NULL; 3015 carp_output_p = NULL; 3016 carp_demote_adj_p = NULL; 3017 carp_master_p = NULL; 3018 mtx_unlock(&carp_mtx); 3019 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 3020 mtx_destroy(&carp_mtx); 3021 sx_destroy(&carp_sx); 3022 } 3023 3024 static void 3025 ipcarp_sysinit(void) 3026 { 3027 3028 /* Load allow as tunable so to postpone carp start after module load */ 3029 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 3030 } 3031 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 3032 3033 static int 3034 carp_mod_load(void) 3035 { 3036 int err; 3037 3038 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 3039 sx_init(&carp_sx, "carp_sx"); 3040 LIST_INIT(&carp_list); 3041 carp_get_vhid_p = carp_get_vhid; 3042 carp_forus_p = carp_forus; 3043 carp_output_p = carp_output; 3044 carp_linkstate_p = carp_linkstate; 3045 carp_ioctl_p = carp_ioctl; 3046 carp_attach_p = carp_attach; 3047 carp_detach_p = carp_detach; 3048 carp_demote_adj_p = carp_demote_adj; 3049 carp_master_p = carp_master; 3050 #ifdef INET6 3051 carp_iamatch6_p = carp_iamatch6; 3052 carp_macmatch6_p = carp_macmatch6; 3053 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 3054 if (err) { 3055 printf("carp: error %d registering with INET6\n", err); 3056 carp_mod_cleanup(); 3057 return (err); 3058 } 3059 #endif 3060 #ifdef INET 3061 carp_iamatch_p = carp_iamatch; 3062 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 3063 if (err) { 3064 printf("carp: error %d registering with INET\n", err); 3065 carp_mod_cleanup(); 3066 return (err); 3067 } 3068 #endif 3069 3070 carp_nl_register(); 3071 3072 return (0); 3073 } 3074 3075 static int 3076 carp_modevent(module_t mod, int type, void *data) 3077 { 3078 switch (type) { 3079 case MOD_LOAD: 3080 return carp_mod_load(); 3081 /* NOTREACHED */ 3082 case MOD_UNLOAD: 3083 mtx_lock(&carp_mtx); 3084 if (LIST_EMPTY(&carp_list)) 3085 carp_mod_cleanup(); 3086 else { 3087 mtx_unlock(&carp_mtx); 3088 return (EBUSY); 3089 } 3090 break; 3091 3092 default: 3093 return (EINVAL); 3094 } 3095 3096 return (0); 3097 } 3098 3099 static moduledata_t carp_mod = { 3100 "carp", 3101 carp_modevent, 3102 0 3103 }; 3104 3105 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 3106