1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_bpf.h" 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bus.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/module.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/sockio.h> 52 #include <sys/sysctl.h> 53 #include <sys/syslog.h> 54 #include <sys/taskqueue.h> 55 #include <sys/counter.h> 56 57 #include <net/ethernet.h> 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_dl.h> 61 #include <net/if_llatbl.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 90 91 struct carp_softc { 92 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 93 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 94 struct sockaddr_dl sc_addr; /* Our link level address. */ 95 struct callout sc_ad_tmo; /* Advertising timeout. */ 96 #ifdef INET 97 struct callout sc_md_tmo; /* Master down timeout. */ 98 #endif 99 #ifdef INET6 100 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 101 #endif 102 struct mtx sc_mtx; 103 104 int sc_vhid; 105 int sc_advskew; 106 int sc_advbase; 107 108 int sc_naddrs; 109 int sc_naddrs6; 110 int sc_ifasiz; 111 enum { INIT = 0, BACKUP, MASTER } sc_state; 112 int sc_suppress; 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_init_counter; 119 uint64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 128 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 129 }; 130 131 struct carp_if { 132 #ifdef INET 133 int cif_naddrs; 134 #endif 135 #ifdef INET6 136 int cif_naddrs6; 137 #endif 138 TAILQ_HEAD(, carp_softc) cif_vrs; 139 #ifdef INET 140 struct ip_moptions cif_imo; 141 #endif 142 #ifdef INET6 143 struct ip6_moptions cif_im6o; 144 #endif 145 struct ifnet *cif_ifp; 146 struct mtx cif_mtx; 147 uint32_t cif_flags; 148 #define CIF_PROMISC 0x00000001 149 }; 150 151 #define CARP_INET 0 152 #define CARP_INET6 1 153 static int proto_reg[] = {-1, -1}; 154 155 /* 156 * Brief design of carp(4). 157 * 158 * Any carp-capable ifnet may have a list of carp softcs hanging off 159 * its ifp->if_carp pointer. Each softc represents one unique virtual 160 * host id, or vhid. The softc has a back pointer to the ifnet. All 161 * softcs are joined in a global list, which has quite limited use. 162 * 163 * Any interface address that takes part in CARP negotiation has a 164 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 165 * AF_INET or AF_INET6 address. 166 * 167 * Although, one can get the softc's backpointer to ifnet and traverse 168 * through its ifp->if_addrhead queue to find all interface addresses 169 * involved in CARP, we keep a growable array of ifaddr pointers. This 170 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 171 * do calls into the network stack, thus avoiding LORs. 172 * 173 * Locking: 174 * 175 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 176 * callout-driven events and ioctl()s. 177 * 178 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 179 * To traverse the global list we use the mutex carp_mtx. 180 * 181 * Known issues with locking: 182 * 183 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 184 * counting is done on the softc. 185 * - On module unload we may race (?) with packet processing thread 186 * dereferencing our function pointers. 187 */ 188 189 /* Accept incoming CARP packets. */ 190 VNET_DEFINE_STATIC(int, carp_allow) = 1; 191 #define V_carp_allow VNET(carp_allow) 192 193 /* Set DSCP in outgoing CARP packets. */ 194 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 195 #define V_carp_dscp VNET(carp_dscp) 196 197 /* Preempt slower nodes. */ 198 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 199 #define V_carp_preempt VNET(carp_preempt) 200 201 /* Log level. */ 202 VNET_DEFINE_STATIC(int, carp_log) = 1; 203 #define V_carp_log VNET(carp_log) 204 205 /* Global advskew demotion. */ 206 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 207 #define V_carp_demotion VNET(carp_demotion) 208 209 /* Send error demotion factor. */ 210 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 211 #define V_carp_senderr_adj VNET(carp_senderr_adj) 212 213 /* Iface down demotion factor. */ 214 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 215 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 216 217 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 218 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 219 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 220 221 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 222 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 223 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I", 224 "Accept incoming CARP packets"); 225 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 226 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I", 227 "DSCP value for carp packets"); 228 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 229 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 230 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 231 &VNET_NAME(carp_log), 0, "CARP log level"); 232 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 233 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 234 0, 0, carp_demote_adj_sysctl, "I", 235 "Adjust demotion factor (skew of advskew)"); 236 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 237 CTLFLAG_VNET | CTLFLAG_RW, 238 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 239 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 240 CTLFLAG_VNET | CTLFLAG_RW, 241 &VNET_NAME(carp_ifdown_adj), 0, 242 "Interface down demotion factor adjustment"); 243 244 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 245 VNET_PCPUSTAT_SYSINIT(carpstats); 246 VNET_PCPUSTAT_SYSUNINIT(carpstats); 247 248 #define CARPSTATS_ADD(name, val) \ 249 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 250 sizeof(uint64_t)], (val)) 251 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 252 253 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 254 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 255 256 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 257 NULL, MTX_DEF) 258 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 259 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 260 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 261 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 262 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 263 NULL, MTX_DEF) 264 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 265 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 266 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 267 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 268 #define CIF_FREE(cif) do { \ 269 CIF_LOCK(cif); \ 270 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 271 carp_free_if(cif); \ 272 else \ 273 CIF_UNLOCK(cif); \ 274 } while (0) 275 276 #define CARP_LOG(...) do { \ 277 if (V_carp_log > 0) \ 278 log(LOG_INFO, "carp: " __VA_ARGS__); \ 279 } while (0) 280 281 #define CARP_DEBUG(...) do { \ 282 if (V_carp_log > 1) \ 283 log(LOG_DEBUG, __VA_ARGS__); \ 284 } while (0) 285 286 #define IFNET_FOREACH_IFA(ifp, ifa) \ 287 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 288 if ((ifa)->ifa_carp != NULL) 289 290 #define CARP_FOREACH_IFA(sc, ifa) \ 291 CARP_LOCK_ASSERT(sc); \ 292 for (int _i = 0; \ 293 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 294 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 295 ++_i) 296 297 #define IFNET_FOREACH_CARP(ifp, sc) \ 298 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 299 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 300 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 301 302 #define DEMOTE_ADVSKEW(sc) \ 303 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 304 CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) 305 306 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 307 static struct carp_softc 308 *carp_alloc(struct ifnet *); 309 static void carp_destroy(struct carp_softc *); 310 static struct carp_if 311 *carp_alloc_if(struct ifnet *); 312 static void carp_free_if(struct carp_if *); 313 static void carp_set_state(struct carp_softc *, int, const char* reason); 314 static void carp_sc_state(struct carp_softc *); 315 static void carp_setrun(struct carp_softc *, sa_family_t); 316 static void carp_master_down(void *); 317 static void carp_master_down_locked(struct carp_softc *, 318 const char* reason); 319 static void carp_send_ad(void *); 320 static void carp_send_ad_locked(struct carp_softc *); 321 static void carp_addroute(struct carp_softc *); 322 static void carp_ifa_addroute(struct ifaddr *); 323 static void carp_delroute(struct carp_softc *); 324 static void carp_ifa_delroute(struct ifaddr *); 325 static void carp_send_ad_all(void *, int); 326 static void carp_demote_adj(int, char *); 327 328 static LIST_HEAD(, carp_softc) carp_list; 329 static struct mtx carp_mtx; 330 static struct sx carp_sx; 331 static struct task carp_sendall_task = 332 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 333 334 static void 335 carp_hmac_prepare(struct carp_softc *sc) 336 { 337 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 338 uint8_t vhid = sc->sc_vhid & 0xff; 339 struct ifaddr *ifa; 340 int i, found; 341 #ifdef INET 342 struct in_addr last, cur, in; 343 #endif 344 #ifdef INET6 345 struct in6_addr last6, cur6, in6; 346 #endif 347 348 CARP_LOCK_ASSERT(sc); 349 350 /* Compute ipad from key. */ 351 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 352 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 353 for (i = 0; i < sizeof(sc->sc_pad); i++) 354 sc->sc_pad[i] ^= 0x36; 355 356 /* Precompute first part of inner hash. */ 357 SHA1Init(&sc->sc_sha1); 358 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 359 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 360 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 361 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 362 #ifdef INET 363 cur.s_addr = 0; 364 do { 365 found = 0; 366 last = cur; 367 cur.s_addr = 0xffffffff; 368 CARP_FOREACH_IFA(sc, ifa) { 369 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 370 if (ifa->ifa_addr->sa_family == AF_INET && 371 ntohl(in.s_addr) > ntohl(last.s_addr) && 372 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 373 cur.s_addr = in.s_addr; 374 found++; 375 } 376 } 377 if (found) 378 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 379 } while (found); 380 #endif /* INET */ 381 #ifdef INET6 382 memset(&cur6, 0, sizeof(cur6)); 383 do { 384 found = 0; 385 last6 = cur6; 386 memset(&cur6, 0xff, sizeof(cur6)); 387 CARP_FOREACH_IFA(sc, ifa) { 388 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 389 if (IN6_IS_SCOPE_EMBED(&in6)) 390 in6.s6_addr16[1] = 0; 391 if (ifa->ifa_addr->sa_family == AF_INET6 && 392 memcmp(&in6, &last6, sizeof(in6)) > 0 && 393 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 394 cur6 = in6; 395 found++; 396 } 397 } 398 if (found) 399 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 400 } while (found); 401 #endif /* INET6 */ 402 403 /* convert ipad to opad */ 404 for (i = 0; i < sizeof(sc->sc_pad); i++) 405 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 406 } 407 408 static void 409 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 410 unsigned char md[20]) 411 { 412 SHA1_CTX sha1ctx; 413 414 CARP_LOCK_ASSERT(sc); 415 416 /* fetch first half of inner hash */ 417 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 418 419 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 420 SHA1Final(md, &sha1ctx); 421 422 /* outer hash */ 423 SHA1Init(&sha1ctx); 424 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 425 SHA1Update(&sha1ctx, md, 20); 426 SHA1Final(md, &sha1ctx); 427 } 428 429 static int 430 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 431 unsigned char md[20]) 432 { 433 unsigned char md2[20]; 434 435 CARP_LOCK_ASSERT(sc); 436 437 carp_hmac_generate(sc, counter, md2); 438 439 return (bcmp(md, md2, sizeof(md2))); 440 } 441 442 /* 443 * process input packet. 444 * we have rearranged checks order compared to the rfc, 445 * but it seems more efficient this way or not possible otherwise. 446 */ 447 #ifdef INET 448 int 449 carp_input(struct mbuf **mp, int *offp, int proto) 450 { 451 struct mbuf *m = *mp; 452 struct ip *ip = mtod(m, struct ip *); 453 struct carp_header *ch; 454 int iplen, len; 455 456 iplen = *offp; 457 *mp = NULL; 458 459 CARPSTATS_INC(carps_ipackets); 460 461 if (!V_carp_allow) { 462 m_freem(m); 463 return (IPPROTO_DONE); 464 } 465 466 /* verify that the IP TTL is 255. */ 467 if (ip->ip_ttl != CARP_DFLTTL) { 468 CARPSTATS_INC(carps_badttl); 469 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 470 ip->ip_ttl, 471 m->m_pkthdr.rcvif->if_xname); 472 m_freem(m); 473 return (IPPROTO_DONE); 474 } 475 476 iplen = ip->ip_hl << 2; 477 478 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 479 CARPSTATS_INC(carps_badlen); 480 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 481 "on %s\n", __func__, m->m_len - sizeof(struct ip), 482 m->m_pkthdr.rcvif->if_xname); 483 m_freem(m); 484 return (IPPROTO_DONE); 485 } 486 487 if (iplen + sizeof(*ch) < m->m_len) { 488 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 489 CARPSTATS_INC(carps_hdrops); 490 CARP_DEBUG("%s: pullup failed\n", __func__); 491 return (IPPROTO_DONE); 492 } 493 ip = mtod(m, struct ip *); 494 } 495 ch = (struct carp_header *)((char *)ip + iplen); 496 497 /* 498 * verify that the received packet length is 499 * equal to the CARP header 500 */ 501 len = iplen + sizeof(*ch); 502 if (len > m->m_pkthdr.len) { 503 CARPSTATS_INC(carps_badlen); 504 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 505 m->m_pkthdr.len, 506 m->m_pkthdr.rcvif->if_xname); 507 m_freem(m); 508 return (IPPROTO_DONE); 509 } 510 511 if ((m = m_pullup(m, len)) == NULL) { 512 CARPSTATS_INC(carps_hdrops); 513 return (IPPROTO_DONE); 514 } 515 ip = mtod(m, struct ip *); 516 ch = (struct carp_header *)((char *)ip + iplen); 517 518 /* verify the CARP checksum */ 519 m->m_data += iplen; 520 if (in_cksum(m, len - iplen)) { 521 CARPSTATS_INC(carps_badsum); 522 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 523 m->m_pkthdr.rcvif->if_xname); 524 m_freem(m); 525 return (IPPROTO_DONE); 526 } 527 m->m_data -= iplen; 528 529 carp_input_c(m, ch, AF_INET); 530 return (IPPROTO_DONE); 531 } 532 #endif 533 534 #ifdef INET6 535 int 536 carp6_input(struct mbuf **mp, int *offp, int proto) 537 { 538 struct mbuf *m = *mp; 539 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 540 struct carp_header *ch; 541 u_int len; 542 543 CARPSTATS_INC(carps_ipackets6); 544 545 if (!V_carp_allow) { 546 m_freem(m); 547 return (IPPROTO_DONE); 548 } 549 550 /* check if received on a valid carp interface */ 551 if (m->m_pkthdr.rcvif->if_carp == NULL) { 552 CARPSTATS_INC(carps_badif); 553 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 554 __func__, m->m_pkthdr.rcvif->if_xname); 555 m_freem(m); 556 return (IPPROTO_DONE); 557 } 558 559 /* verify that the IP TTL is 255 */ 560 if (ip6->ip6_hlim != CARP_DFLTTL) { 561 CARPSTATS_INC(carps_badttl); 562 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 563 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 564 m_freem(m); 565 return (IPPROTO_DONE); 566 } 567 568 /* verify that we have a complete carp packet */ 569 len = m->m_len; 570 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 571 if (ch == NULL) { 572 CARPSTATS_INC(carps_badlen); 573 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 574 return (IPPROTO_DONE); 575 } 576 577 578 /* verify the CARP checksum */ 579 m->m_data += *offp; 580 if (in_cksum(m, sizeof(*ch))) { 581 CARPSTATS_INC(carps_badsum); 582 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 583 m->m_pkthdr.rcvif->if_xname); 584 m_freem(m); 585 return (IPPROTO_DONE); 586 } 587 m->m_data -= *offp; 588 589 carp_input_c(m, ch, AF_INET6); 590 return (IPPROTO_DONE); 591 } 592 #endif /* INET6 */ 593 594 /* 595 * This routine should not be necessary at all, but some switches 596 * (VMWare ESX vswitches) can echo our own packets back at us, 597 * and we must ignore them or they will cause us to drop out of 598 * MASTER mode. 599 * 600 * We cannot catch all cases of network loops. Instead, what we 601 * do here is catch any packet that arrives with a carp header 602 * with a VHID of 0, that comes from an address that is our own. 603 * These packets are by definition "from us" (even if they are from 604 * a misconfigured host that is pretending to be us). 605 * 606 * The VHID test is outside this mini-function. 607 */ 608 static int 609 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 610 { 611 #ifdef INET 612 struct ip *ip4; 613 struct in_addr in4; 614 #endif 615 #ifdef INET6 616 struct ip6_hdr *ip6; 617 struct in6_addr in6; 618 #endif 619 620 switch (af) { 621 #ifdef INET 622 case AF_INET: 623 ip4 = mtod(m, struct ip *); 624 in4 = ifatoia(ifa)->ia_addr.sin_addr; 625 return (in4.s_addr == ip4->ip_src.s_addr); 626 #endif 627 #ifdef INET6 628 case AF_INET6: 629 ip6 = mtod(m, struct ip6_hdr *); 630 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 631 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 632 #endif 633 default: 634 break; 635 } 636 return (0); 637 } 638 639 static void 640 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 641 { 642 struct ifnet *ifp = m->m_pkthdr.rcvif; 643 struct ifaddr *ifa, *match; 644 struct carp_softc *sc; 645 uint64_t tmp_counter; 646 struct timeval sc_tv, ch_tv; 647 struct epoch_tracker et; 648 int error; 649 650 /* 651 * Verify that the VHID is valid on the receiving interface. 652 * 653 * There should be just one match. If there are none 654 * the VHID is not valid and we drop the packet. If 655 * there are multiple VHID matches, take just the first 656 * one, for compatibility with previous code. While we're 657 * scanning, check for obvious loops in the network topology 658 * (these should never happen, and as noted above, we may 659 * miss real loops; this is just a double-check). 660 */ 661 NET_EPOCH_ENTER(et); 662 error = 0; 663 match = NULL; 664 IFNET_FOREACH_IFA(ifp, ifa) { 665 if (match == NULL && ifa->ifa_carp != NULL && 666 ifa->ifa_addr->sa_family == af && 667 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 668 match = ifa; 669 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 670 error = ELOOP; 671 } 672 ifa = error ? NULL : match; 673 if (ifa != NULL) 674 ifa_ref(ifa); 675 NET_EPOCH_EXIT(et); 676 677 if (ifa == NULL) { 678 if (error == ELOOP) { 679 CARP_DEBUG("dropping looped packet on interface %s\n", 680 ifp->if_xname); 681 CARPSTATS_INC(carps_badif); /* ??? */ 682 } else { 683 CARPSTATS_INC(carps_badvhid); 684 } 685 m_freem(m); 686 return; 687 } 688 689 /* verify the CARP version. */ 690 if (ch->carp_version != CARP_VERSION) { 691 CARPSTATS_INC(carps_badver); 692 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 693 ch->carp_version); 694 ifa_free(ifa); 695 m_freem(m); 696 return; 697 } 698 699 sc = ifa->ifa_carp; 700 CARP_LOCK(sc); 701 ifa_free(ifa); 702 703 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 704 CARPSTATS_INC(carps_badauth); 705 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 706 sc->sc_vhid, ifp->if_xname); 707 goto out; 708 } 709 710 tmp_counter = ntohl(ch->carp_counter[0]); 711 tmp_counter = tmp_counter<<32; 712 tmp_counter += ntohl(ch->carp_counter[1]); 713 714 /* XXX Replay protection goes here */ 715 716 sc->sc_init_counter = 0; 717 sc->sc_counter = tmp_counter; 718 719 sc_tv.tv_sec = sc->sc_advbase; 720 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 721 ch_tv.tv_sec = ch->carp_advbase; 722 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 723 724 switch (sc->sc_state) { 725 case INIT: 726 break; 727 case MASTER: 728 /* 729 * If we receive an advertisement from a master who's going to 730 * be more frequent than us, go into BACKUP state. 731 */ 732 if (timevalcmp(&sc_tv, &ch_tv, >) || 733 timevalcmp(&sc_tv, &ch_tv, ==)) { 734 callout_stop(&sc->sc_ad_tmo); 735 carp_set_state(sc, BACKUP, 736 "more frequent advertisement received"); 737 carp_setrun(sc, 0); 738 carp_delroute(sc); 739 } 740 break; 741 case BACKUP: 742 /* 743 * If we're pre-empting masters who advertise slower than us, 744 * and this one claims to be slower, treat him as down. 745 */ 746 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 747 carp_master_down_locked(sc, 748 "preempting a slower master"); 749 break; 750 } 751 752 /* 753 * If the master is going to advertise at such a low frequency 754 * that he's guaranteed to time out, we'd might as well just 755 * treat him as timed out now. 756 */ 757 sc_tv.tv_sec = sc->sc_advbase * 3; 758 if (timevalcmp(&sc_tv, &ch_tv, <)) { 759 carp_master_down_locked(sc, "master will time out"); 760 break; 761 } 762 763 /* 764 * Otherwise, we reset the counter and wait for the next 765 * advertisement. 766 */ 767 carp_setrun(sc, af); 768 break; 769 } 770 771 out: 772 CARP_UNLOCK(sc); 773 m_freem(m); 774 } 775 776 static int 777 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 778 { 779 struct m_tag *mtag; 780 781 if (sc->sc_init_counter) { 782 /* this could also be seconds since unix epoch */ 783 sc->sc_counter = arc4random(); 784 sc->sc_counter = sc->sc_counter << 32; 785 sc->sc_counter += arc4random(); 786 } else 787 sc->sc_counter++; 788 789 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 790 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 791 792 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 793 794 /* Tag packet for carp_output */ 795 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 796 M_NOWAIT)) == NULL) { 797 m_freem(m); 798 CARPSTATS_INC(carps_onomem); 799 return (ENOMEM); 800 } 801 bcopy(&sc, mtag + 1, sizeof(sc)); 802 m_tag_prepend(m, mtag); 803 804 return (0); 805 } 806 807 /* 808 * To avoid LORs and possible recursions this function shouldn't 809 * be called directly, but scheduled via taskqueue. 810 */ 811 static void 812 carp_send_ad_all(void *ctx __unused, int pending __unused) 813 { 814 struct carp_softc *sc; 815 816 mtx_lock(&carp_mtx); 817 LIST_FOREACH(sc, &carp_list, sc_next) 818 if (sc->sc_state == MASTER) { 819 CARP_LOCK(sc); 820 CURVNET_SET(sc->sc_carpdev->if_vnet); 821 carp_send_ad_locked(sc); 822 CURVNET_RESTORE(); 823 CARP_UNLOCK(sc); 824 } 825 mtx_unlock(&carp_mtx); 826 } 827 828 /* Send a periodic advertisement, executed in callout context. */ 829 static void 830 carp_send_ad(void *v) 831 { 832 struct carp_softc *sc = v; 833 834 CARP_LOCK_ASSERT(sc); 835 CURVNET_SET(sc->sc_carpdev->if_vnet); 836 carp_send_ad_locked(sc); 837 CURVNET_RESTORE(); 838 CARP_UNLOCK(sc); 839 } 840 841 static void 842 carp_send_ad_error(struct carp_softc *sc, int error) 843 { 844 845 if (error) { 846 if (sc->sc_sendad_errors < INT_MAX) 847 sc->sc_sendad_errors++; 848 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 849 static const char fmt[] = "send error %d on %s"; 850 char msg[sizeof(fmt) + IFNAMSIZ]; 851 852 sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); 853 carp_demote_adj(V_carp_senderr_adj, msg); 854 } 855 sc->sc_sendad_success = 0; 856 } else { 857 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && 858 ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 859 static const char fmt[] = "send ok on %s"; 860 char msg[sizeof(fmt) + IFNAMSIZ]; 861 862 sprintf(msg, fmt, sc->sc_carpdev->if_xname); 863 carp_demote_adj(-V_carp_senderr_adj, msg); 864 sc->sc_sendad_errors = 0; 865 } else 866 sc->sc_sendad_errors = 0; 867 } 868 } 869 870 /* 871 * Pick the best ifaddr on the given ifp for sending CARP 872 * advertisements. 873 * 874 * "Best" here is defined by ifa_preferred(). This function is much 875 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 876 * 877 * (This could be simplified to return the actual address, except that 878 * it has a different format in AF_INET and AF_INET6.) 879 */ 880 static struct ifaddr * 881 carp_best_ifa(int af, struct ifnet *ifp) 882 { 883 struct epoch_tracker et; 884 struct ifaddr *ifa, *best; 885 886 if (af >= AF_MAX) 887 return (NULL); 888 best = NULL; 889 NET_EPOCH_ENTER(et); 890 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 891 if (ifa->ifa_addr->sa_family == af && 892 (best == NULL || ifa_preferred(best, ifa))) 893 best = ifa; 894 } 895 NET_EPOCH_EXIT(et); 896 if (best != NULL) 897 ifa_ref(best); 898 return (best); 899 } 900 901 static void 902 carp_send_ad_locked(struct carp_softc *sc) 903 { 904 struct carp_header ch; 905 struct timeval tv; 906 struct ifaddr *ifa; 907 struct carp_header *ch_ptr; 908 struct mbuf *m; 909 int len, advskew; 910 911 CARP_LOCK_ASSERT(sc); 912 913 advskew = DEMOTE_ADVSKEW(sc); 914 tv.tv_sec = sc->sc_advbase; 915 tv.tv_usec = advskew * 1000000 / 256; 916 917 ch.carp_version = CARP_VERSION; 918 ch.carp_type = CARP_ADVERTISEMENT; 919 ch.carp_vhid = sc->sc_vhid; 920 ch.carp_advbase = sc->sc_advbase; 921 ch.carp_advskew = advskew; 922 ch.carp_authlen = 7; /* XXX DEFINE */ 923 ch.carp_pad1 = 0; /* must be zero */ 924 ch.carp_cksum = 0; 925 926 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 927 928 #ifdef INET 929 if (sc->sc_naddrs) { 930 struct ip *ip; 931 932 m = m_gethdr(M_NOWAIT, MT_DATA); 933 if (m == NULL) { 934 CARPSTATS_INC(carps_onomem); 935 goto resched; 936 } 937 len = sizeof(*ip) + sizeof(ch); 938 m->m_pkthdr.len = len; 939 m->m_pkthdr.rcvif = NULL; 940 m->m_len = len; 941 M_ALIGN(m, m->m_len); 942 m->m_flags |= M_MCAST; 943 ip = mtod(m, struct ip *); 944 ip->ip_v = IPVERSION; 945 ip->ip_hl = sizeof(*ip) >> 2; 946 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 947 ip->ip_len = htons(len); 948 ip->ip_off = htons(IP_DF); 949 ip->ip_ttl = CARP_DFLTTL; 950 ip->ip_p = IPPROTO_CARP; 951 ip->ip_sum = 0; 952 ip_fillid(ip); 953 954 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 955 if (ifa != NULL) { 956 ip->ip_src.s_addr = 957 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 958 ifa_free(ifa); 959 } else 960 ip->ip_src.s_addr = 0; 961 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 962 963 ch_ptr = (struct carp_header *)(&ip[1]); 964 bcopy(&ch, ch_ptr, sizeof(ch)); 965 if (carp_prepare_ad(m, sc, ch_ptr)) 966 goto resched; 967 968 m->m_data += sizeof(*ip); 969 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 970 m->m_data -= sizeof(*ip); 971 972 CARPSTATS_INC(carps_opackets); 973 974 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 975 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 976 } 977 #endif /* INET */ 978 #ifdef INET6 979 if (sc->sc_naddrs6) { 980 struct ip6_hdr *ip6; 981 982 m = m_gethdr(M_NOWAIT, MT_DATA); 983 if (m == NULL) { 984 CARPSTATS_INC(carps_onomem); 985 goto resched; 986 } 987 len = sizeof(*ip6) + sizeof(ch); 988 m->m_pkthdr.len = len; 989 m->m_pkthdr.rcvif = NULL; 990 m->m_len = len; 991 M_ALIGN(m, m->m_len); 992 m->m_flags |= M_MCAST; 993 ip6 = mtod(m, struct ip6_hdr *); 994 bzero(ip6, sizeof(*ip6)); 995 ip6->ip6_vfc |= IPV6_VERSION; 996 /* Traffic class isn't defined in ip6 struct instead 997 * it gets offset into flowid field */ 998 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 999 IPTOS_DSCP_OFFSET)); 1000 ip6->ip6_hlim = CARP_DFLTTL; 1001 ip6->ip6_nxt = IPPROTO_CARP; 1002 1003 /* set the source address */ 1004 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1005 if (ifa != NULL) { 1006 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1007 sizeof(struct in6_addr)); 1008 ifa_free(ifa); 1009 } else 1010 /* This should never happen with IPv6. */ 1011 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1012 1013 /* Set the multicast destination. */ 1014 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1015 ip6->ip6_dst.s6_addr8[15] = 0x12; 1016 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1017 m_freem(m); 1018 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1019 goto resched; 1020 } 1021 1022 ch_ptr = (struct carp_header *)(&ip6[1]); 1023 bcopy(&ch, ch_ptr, sizeof(ch)); 1024 if (carp_prepare_ad(m, sc, ch_ptr)) 1025 goto resched; 1026 1027 m->m_data += sizeof(*ip6); 1028 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1029 m->m_data -= sizeof(*ip6); 1030 1031 CARPSTATS_INC(carps_opackets6); 1032 1033 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1034 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1035 } 1036 #endif /* INET6 */ 1037 1038 resched: 1039 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1040 } 1041 1042 static void 1043 carp_addroute(struct carp_softc *sc) 1044 { 1045 struct ifaddr *ifa; 1046 1047 CARP_FOREACH_IFA(sc, ifa) 1048 carp_ifa_addroute(ifa); 1049 } 1050 1051 static void 1052 carp_ifa_addroute(struct ifaddr *ifa) 1053 { 1054 1055 switch (ifa->ifa_addr->sa_family) { 1056 #ifdef INET 1057 case AF_INET: 1058 in_addprefix(ifatoia(ifa), RTF_UP); 1059 ifa_add_loopback_route(ifa, 1060 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1061 break; 1062 #endif 1063 #ifdef INET6 1064 case AF_INET6: 1065 ifa_add_loopback_route(ifa, 1066 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1067 nd6_add_ifa_lle(ifatoia6(ifa)); 1068 break; 1069 #endif 1070 } 1071 } 1072 1073 static void 1074 carp_delroute(struct carp_softc *sc) 1075 { 1076 struct ifaddr *ifa; 1077 1078 CARP_FOREACH_IFA(sc, ifa) 1079 carp_ifa_delroute(ifa); 1080 } 1081 1082 static void 1083 carp_ifa_delroute(struct ifaddr *ifa) 1084 { 1085 1086 switch (ifa->ifa_addr->sa_family) { 1087 #ifdef INET 1088 case AF_INET: 1089 ifa_del_loopback_route(ifa, 1090 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1091 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1092 break; 1093 #endif 1094 #ifdef INET6 1095 case AF_INET6: 1096 ifa_del_loopback_route(ifa, 1097 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1098 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1099 break; 1100 #endif 1101 } 1102 } 1103 1104 int 1105 carp_master(struct ifaddr *ifa) 1106 { 1107 struct carp_softc *sc = ifa->ifa_carp; 1108 1109 return (sc->sc_state == MASTER); 1110 } 1111 1112 #ifdef INET 1113 /* 1114 * Broadcast a gratuitous ARP request containing 1115 * the virtual router MAC address for each IP address 1116 * associated with the virtual router. 1117 */ 1118 static void 1119 carp_send_arp(struct carp_softc *sc) 1120 { 1121 struct ifaddr *ifa; 1122 struct in_addr addr; 1123 1124 CARP_FOREACH_IFA(sc, ifa) { 1125 if (ifa->ifa_addr->sa_family != AF_INET) 1126 continue; 1127 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1128 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1129 } 1130 } 1131 1132 int 1133 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1134 { 1135 struct carp_softc *sc = ifa->ifa_carp; 1136 1137 if (sc->sc_state == MASTER) { 1138 *enaddr = LLADDR(&sc->sc_addr); 1139 return (1); 1140 } 1141 1142 return (0); 1143 } 1144 #endif 1145 1146 #ifdef INET6 1147 static void 1148 carp_send_na(struct carp_softc *sc) 1149 { 1150 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1151 struct ifaddr *ifa; 1152 struct in6_addr *in6; 1153 1154 CARP_FOREACH_IFA(sc, ifa) { 1155 if (ifa->ifa_addr->sa_family != AF_INET6) 1156 continue; 1157 1158 in6 = IFA_IN6(ifa); 1159 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1160 ND_NA_FLAG_OVERRIDE, 1, NULL); 1161 DELAY(1000); /* XXX */ 1162 } 1163 } 1164 1165 /* 1166 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1167 * matches and is not a carp address. Returns NULL otherwise. 1168 */ 1169 struct ifaddr * 1170 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1171 { 1172 struct epoch_tracker et; 1173 struct ifaddr *ifa; 1174 1175 ifa = NULL; 1176 NET_EPOCH_ENTER(et); 1177 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1178 if (ifa->ifa_addr->sa_family != AF_INET6) 1179 continue; 1180 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1181 continue; 1182 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1183 ifa = NULL; 1184 else 1185 ifa_ref(ifa); 1186 break; 1187 } 1188 NET_EPOCH_EXIT(et); 1189 1190 return (ifa); 1191 } 1192 1193 caddr_t 1194 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1195 { 1196 struct epoch_tracker et; 1197 struct ifaddr *ifa; 1198 1199 NET_EPOCH_ENTER(et); 1200 IFNET_FOREACH_IFA(ifp, ifa) 1201 if (ifa->ifa_addr->sa_family == AF_INET6 && 1202 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1203 struct carp_softc *sc = ifa->ifa_carp; 1204 struct m_tag *mtag; 1205 1206 NET_EPOCH_EXIT(et); 1207 1208 mtag = m_tag_get(PACKET_TAG_CARP, 1209 sizeof(struct carp_softc *), M_NOWAIT); 1210 if (mtag == NULL) 1211 /* Better a bit than nothing. */ 1212 return (LLADDR(&sc->sc_addr)); 1213 1214 bcopy(&sc, mtag + 1, sizeof(sc)); 1215 m_tag_prepend(m, mtag); 1216 1217 return (LLADDR(&sc->sc_addr)); 1218 } 1219 NET_EPOCH_EXIT(et); 1220 1221 return (NULL); 1222 } 1223 #endif /* INET6 */ 1224 1225 int 1226 carp_forus(struct ifnet *ifp, u_char *dhost) 1227 { 1228 struct carp_softc *sc; 1229 uint8_t *ena = dhost; 1230 1231 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1232 return (0); 1233 1234 CIF_LOCK(ifp->if_carp); 1235 IFNET_FOREACH_CARP(ifp, sc) { 1236 CARP_LOCK(sc); 1237 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1238 ETHER_ADDR_LEN)) { 1239 CARP_UNLOCK(sc); 1240 CIF_UNLOCK(ifp->if_carp); 1241 return (1); 1242 } 1243 CARP_UNLOCK(sc); 1244 } 1245 CIF_UNLOCK(ifp->if_carp); 1246 1247 return (0); 1248 } 1249 1250 /* Master down timeout event, executed in callout context. */ 1251 static void 1252 carp_master_down(void *v) 1253 { 1254 struct carp_softc *sc = v; 1255 1256 CARP_LOCK_ASSERT(sc); 1257 1258 CURVNET_SET(sc->sc_carpdev->if_vnet); 1259 if (sc->sc_state == BACKUP) { 1260 carp_master_down_locked(sc, "master timed out"); 1261 } 1262 CURVNET_RESTORE(); 1263 1264 CARP_UNLOCK(sc); 1265 } 1266 1267 static void 1268 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1269 { 1270 1271 CARP_LOCK_ASSERT(sc); 1272 1273 switch (sc->sc_state) { 1274 case BACKUP: 1275 carp_set_state(sc, MASTER, reason); 1276 carp_send_ad_locked(sc); 1277 #ifdef INET 1278 carp_send_arp(sc); 1279 #endif 1280 #ifdef INET6 1281 carp_send_na(sc); 1282 #endif 1283 carp_setrun(sc, 0); 1284 carp_addroute(sc); 1285 break; 1286 case INIT: 1287 case MASTER: 1288 #ifdef INVARIANTS 1289 panic("carp: VHID %u@%s: master_down event in %s state\n", 1290 sc->sc_vhid, 1291 sc->sc_carpdev->if_xname, 1292 sc->sc_state ? "MASTER" : "INIT"); 1293 #endif 1294 break; 1295 } 1296 } 1297 1298 /* 1299 * When in backup state, af indicates whether to reset the master down timer 1300 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1301 */ 1302 static void 1303 carp_setrun(struct carp_softc *sc, sa_family_t af) 1304 { 1305 struct timeval tv; 1306 1307 CARP_LOCK_ASSERT(sc); 1308 1309 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1310 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1311 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1312 !V_carp_allow) 1313 return; 1314 1315 switch (sc->sc_state) { 1316 case INIT: 1317 carp_set_state(sc, BACKUP, "initialization complete"); 1318 carp_setrun(sc, 0); 1319 break; 1320 case BACKUP: 1321 callout_stop(&sc->sc_ad_tmo); 1322 tv.tv_sec = 3 * sc->sc_advbase; 1323 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1324 switch (af) { 1325 #ifdef INET 1326 case AF_INET: 1327 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1328 carp_master_down, sc); 1329 break; 1330 #endif 1331 #ifdef INET6 1332 case AF_INET6: 1333 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1334 carp_master_down, sc); 1335 break; 1336 #endif 1337 default: 1338 #ifdef INET 1339 if (sc->sc_naddrs) 1340 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1341 carp_master_down, sc); 1342 #endif 1343 #ifdef INET6 1344 if (sc->sc_naddrs6) 1345 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1346 carp_master_down, sc); 1347 #endif 1348 break; 1349 } 1350 break; 1351 case MASTER: 1352 tv.tv_sec = sc->sc_advbase; 1353 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1354 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1355 carp_send_ad, sc); 1356 break; 1357 } 1358 } 1359 1360 /* 1361 * Setup multicast structures. 1362 */ 1363 static int 1364 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1365 { 1366 struct ifnet *ifp = cif->cif_ifp; 1367 int error = 0; 1368 1369 switch (sa) { 1370 #ifdef INET 1371 case AF_INET: 1372 { 1373 struct ip_moptions *imo = &cif->cif_imo; 1374 struct in_addr addr; 1375 1376 if (imo->imo_membership) 1377 return (0); 1378 1379 imo->imo_membership = (struct in_multi **)malloc( 1380 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 1381 M_WAITOK); 1382 imo->imo_mfilters = NULL; 1383 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1384 imo->imo_multicast_vif = -1; 1385 1386 addr.s_addr = htonl(INADDR_CARP_GROUP); 1387 if ((error = in_joingroup(ifp, &addr, NULL, 1388 &imo->imo_membership[0])) != 0) { 1389 free(imo->imo_membership, M_CARP); 1390 break; 1391 } 1392 imo->imo_num_memberships++; 1393 imo->imo_multicast_ifp = ifp; 1394 imo->imo_multicast_ttl = CARP_DFLTTL; 1395 imo->imo_multicast_loop = 0; 1396 break; 1397 } 1398 #endif 1399 #ifdef INET6 1400 case AF_INET6: 1401 { 1402 struct ip6_moptions *im6o = &cif->cif_im6o; 1403 struct in6_addr in6; 1404 struct in6_multi *in6m; 1405 1406 if (im6o->im6o_membership) 1407 return (0); 1408 1409 im6o->im6o_membership = (struct in6_multi **)malloc( 1410 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 1411 M_ZERO | M_WAITOK); 1412 im6o->im6o_mfilters = NULL; 1413 im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 1414 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1415 im6o->im6o_multicast_ifp = ifp; 1416 1417 /* Join IPv6 CARP multicast group. */ 1418 bzero(&in6, sizeof(in6)); 1419 in6.s6_addr16[0] = htons(0xff02); 1420 in6.s6_addr8[15] = 0x12; 1421 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1422 free(im6o->im6o_membership, M_CARP); 1423 break; 1424 } 1425 in6m = NULL; 1426 if ((error = in6_joingroup(ifp, &in6, NULL, &in6m, 0)) != 0) { 1427 free(im6o->im6o_membership, M_CARP); 1428 break; 1429 } 1430 in6m_acquire(in6m); 1431 im6o->im6o_membership[0] = in6m; 1432 im6o->im6o_num_memberships++; 1433 1434 /* Join solicited multicast address. */ 1435 bzero(&in6, sizeof(in6)); 1436 in6.s6_addr16[0] = htons(0xff02); 1437 in6.s6_addr32[1] = 0; 1438 in6.s6_addr32[2] = htonl(1); 1439 in6.s6_addr32[3] = 0; 1440 in6.s6_addr8[12] = 0xff; 1441 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1442 in6_leavegroup(im6o->im6o_membership[0], NULL); 1443 free(im6o->im6o_membership, M_CARP); 1444 break; 1445 } 1446 in6m = NULL; 1447 if ((error = in6_joingroup(ifp, &in6, NULL, &in6m, 0)) != 0) { 1448 in6_leavegroup(im6o->im6o_membership[0], NULL); 1449 free(im6o->im6o_membership, M_CARP); 1450 break; 1451 } 1452 in6m_acquire(in6m); 1453 im6o->im6o_membership[1] = in6m; 1454 im6o->im6o_num_memberships++; 1455 break; 1456 } 1457 #endif 1458 } 1459 1460 return (error); 1461 } 1462 1463 /* 1464 * Free multicast structures. 1465 */ 1466 static void 1467 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1468 { 1469 1470 sx_assert(&carp_sx, SA_XLOCKED); 1471 1472 switch (sa) { 1473 #ifdef INET 1474 case AF_INET: 1475 if (cif->cif_naddrs == 0) { 1476 struct ip_moptions *imo = &cif->cif_imo; 1477 1478 in_leavegroup(imo->imo_membership[0], NULL); 1479 KASSERT(imo->imo_mfilters == NULL, 1480 ("%s: imo_mfilters != NULL", __func__)); 1481 free(imo->imo_membership, M_CARP); 1482 imo->imo_membership = NULL; 1483 1484 } 1485 break; 1486 #endif 1487 #ifdef INET6 1488 case AF_INET6: 1489 if (cif->cif_naddrs6 == 0) { 1490 struct ip6_moptions *im6o = &cif->cif_im6o; 1491 1492 in6_leavegroup(im6o->im6o_membership[0], NULL); 1493 in6_leavegroup(im6o->im6o_membership[1], NULL); 1494 KASSERT(im6o->im6o_mfilters == NULL, 1495 ("%s: im6o_mfilters != NULL", __func__)); 1496 free(im6o->im6o_membership, M_CARP); 1497 im6o->im6o_membership = NULL; 1498 } 1499 break; 1500 #endif 1501 } 1502 } 1503 1504 int 1505 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1506 { 1507 struct m_tag *mtag; 1508 struct carp_softc *sc; 1509 1510 if (!sa) 1511 return (0); 1512 1513 switch (sa->sa_family) { 1514 #ifdef INET 1515 case AF_INET: 1516 break; 1517 #endif 1518 #ifdef INET6 1519 case AF_INET6: 1520 break; 1521 #endif 1522 default: 1523 return (0); 1524 } 1525 1526 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1527 if (mtag == NULL) 1528 return (0); 1529 1530 bcopy(mtag + 1, &sc, sizeof(sc)); 1531 1532 /* Set the source MAC address to the Virtual Router MAC Address. */ 1533 switch (ifp->if_type) { 1534 case IFT_ETHER: 1535 case IFT_BRIDGE: 1536 case IFT_L2VLAN: { 1537 struct ether_header *eh; 1538 1539 eh = mtod(m, struct ether_header *); 1540 eh->ether_shost[0] = 0; 1541 eh->ether_shost[1] = 0; 1542 eh->ether_shost[2] = 0x5e; 1543 eh->ether_shost[3] = 0; 1544 eh->ether_shost[4] = 1; 1545 eh->ether_shost[5] = sc->sc_vhid; 1546 } 1547 break; 1548 default: 1549 printf("%s: carp is not supported for the %d interface type\n", 1550 ifp->if_xname, ifp->if_type); 1551 return (EOPNOTSUPP); 1552 } 1553 1554 return (0); 1555 } 1556 1557 static struct carp_softc* 1558 carp_alloc(struct ifnet *ifp) 1559 { 1560 struct carp_softc *sc; 1561 struct carp_if *cif; 1562 1563 sx_assert(&carp_sx, SA_XLOCKED); 1564 1565 if ((cif = ifp->if_carp) == NULL) 1566 cif = carp_alloc_if(ifp); 1567 1568 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1569 1570 sc->sc_advbase = CARP_DFLTINTV; 1571 sc->sc_vhid = -1; /* required setting */ 1572 sc->sc_init_counter = 1; 1573 sc->sc_state = INIT; 1574 1575 sc->sc_ifasiz = sizeof(struct ifaddr *); 1576 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1577 sc->sc_carpdev = ifp; 1578 1579 CARP_LOCK_INIT(sc); 1580 #ifdef INET 1581 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1582 #endif 1583 #ifdef INET6 1584 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1585 #endif 1586 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1587 1588 CIF_LOCK(cif); 1589 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1590 CIF_UNLOCK(cif); 1591 1592 mtx_lock(&carp_mtx); 1593 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1594 mtx_unlock(&carp_mtx); 1595 1596 return (sc); 1597 } 1598 1599 static void 1600 carp_grow_ifas(struct carp_softc *sc) 1601 { 1602 struct ifaddr **new; 1603 1604 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1605 CARP_LOCK(sc); 1606 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1607 free(sc->sc_ifas, M_CARP); 1608 sc->sc_ifas = new; 1609 sc->sc_ifasiz *= 2; 1610 CARP_UNLOCK(sc); 1611 } 1612 1613 static void 1614 carp_destroy(struct carp_softc *sc) 1615 { 1616 struct ifnet *ifp = sc->sc_carpdev; 1617 struct carp_if *cif = ifp->if_carp; 1618 1619 sx_assert(&carp_sx, SA_XLOCKED); 1620 1621 if (sc->sc_suppress) 1622 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1623 CARP_UNLOCK(sc); 1624 1625 CIF_LOCK(cif); 1626 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1627 CIF_UNLOCK(cif); 1628 1629 mtx_lock(&carp_mtx); 1630 LIST_REMOVE(sc, sc_next); 1631 mtx_unlock(&carp_mtx); 1632 1633 callout_drain(&sc->sc_ad_tmo); 1634 #ifdef INET 1635 callout_drain(&sc->sc_md_tmo); 1636 #endif 1637 #ifdef INET6 1638 callout_drain(&sc->sc_md6_tmo); 1639 #endif 1640 CARP_LOCK_DESTROY(sc); 1641 1642 free(sc->sc_ifas, M_CARP); 1643 free(sc, M_CARP); 1644 } 1645 1646 static struct carp_if* 1647 carp_alloc_if(struct ifnet *ifp) 1648 { 1649 struct carp_if *cif; 1650 int error; 1651 1652 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1653 1654 if ((error = ifpromisc(ifp, 1)) != 0) 1655 printf("%s: ifpromisc(%s) failed: %d\n", 1656 __func__, ifp->if_xname, error); 1657 else 1658 cif->cif_flags |= CIF_PROMISC; 1659 1660 CIF_LOCK_INIT(cif); 1661 cif->cif_ifp = ifp; 1662 TAILQ_INIT(&cif->cif_vrs); 1663 1664 IF_ADDR_WLOCK(ifp); 1665 ifp->if_carp = cif; 1666 if_ref(ifp); 1667 IF_ADDR_WUNLOCK(ifp); 1668 1669 return (cif); 1670 } 1671 1672 static void 1673 carp_free_if(struct carp_if *cif) 1674 { 1675 struct ifnet *ifp = cif->cif_ifp; 1676 1677 CIF_LOCK_ASSERT(cif); 1678 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1679 __func__)); 1680 1681 IF_ADDR_WLOCK(ifp); 1682 ifp->if_carp = NULL; 1683 IF_ADDR_WUNLOCK(ifp); 1684 1685 CIF_LOCK_DESTROY(cif); 1686 1687 if (cif->cif_flags & CIF_PROMISC) 1688 ifpromisc(ifp, 0); 1689 if_rele(ifp); 1690 1691 free(cif, M_CARP); 1692 } 1693 1694 static void 1695 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1696 { 1697 1698 CARP_LOCK(sc); 1699 carpr->carpr_state = sc->sc_state; 1700 carpr->carpr_vhid = sc->sc_vhid; 1701 carpr->carpr_advbase = sc->sc_advbase; 1702 carpr->carpr_advskew = sc->sc_advskew; 1703 if (priv) 1704 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1705 else 1706 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1707 CARP_UNLOCK(sc); 1708 } 1709 1710 int 1711 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1712 { 1713 struct carpreq carpr; 1714 struct ifnet *ifp; 1715 struct carp_softc *sc = NULL; 1716 int error = 0, locked = 0; 1717 1718 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 1719 return (error); 1720 1721 ifp = ifunit_ref(ifr->ifr_name); 1722 if (ifp == NULL) 1723 return (ENXIO); 1724 1725 switch (ifp->if_type) { 1726 case IFT_ETHER: 1727 case IFT_L2VLAN: 1728 case IFT_BRIDGE: 1729 break; 1730 default: 1731 error = EOPNOTSUPP; 1732 goto out; 1733 } 1734 1735 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1736 error = EADDRNOTAVAIL; 1737 goto out; 1738 } 1739 1740 sx_xlock(&carp_sx); 1741 switch (cmd) { 1742 case SIOCSVH: 1743 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1744 break; 1745 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1746 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1747 error = EINVAL; 1748 break; 1749 } 1750 1751 if (ifp->if_carp) { 1752 IFNET_FOREACH_CARP(ifp, sc) 1753 if (sc->sc_vhid == carpr.carpr_vhid) 1754 break; 1755 } 1756 if (sc == NULL) { 1757 sc = carp_alloc(ifp); 1758 CARP_LOCK(sc); 1759 sc->sc_vhid = carpr.carpr_vhid; 1760 LLADDR(&sc->sc_addr)[0] = 0; 1761 LLADDR(&sc->sc_addr)[1] = 0; 1762 LLADDR(&sc->sc_addr)[2] = 0x5e; 1763 LLADDR(&sc->sc_addr)[3] = 0; 1764 LLADDR(&sc->sc_addr)[4] = 1; 1765 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1766 } else 1767 CARP_LOCK(sc); 1768 locked = 1; 1769 if (carpr.carpr_advbase > 0) { 1770 if (carpr.carpr_advbase > 255 || 1771 carpr.carpr_advbase < CARP_DFLTINTV) { 1772 error = EINVAL; 1773 break; 1774 } 1775 sc->sc_advbase = carpr.carpr_advbase; 1776 } 1777 if (carpr.carpr_advskew >= 255) { 1778 error = EINVAL; 1779 break; 1780 } 1781 sc->sc_advskew = carpr.carpr_advskew; 1782 if (carpr.carpr_key[0] != '\0') { 1783 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1784 carp_hmac_prepare(sc); 1785 } 1786 if (sc->sc_state != INIT && 1787 carpr.carpr_state != sc->sc_state) { 1788 switch (carpr.carpr_state) { 1789 case BACKUP: 1790 callout_stop(&sc->sc_ad_tmo); 1791 carp_set_state(sc, BACKUP, 1792 "user requested via ifconfig"); 1793 carp_setrun(sc, 0); 1794 carp_delroute(sc); 1795 break; 1796 case MASTER: 1797 carp_master_down_locked(sc, 1798 "user requested via ifconfig"); 1799 break; 1800 default: 1801 break; 1802 } 1803 } 1804 break; 1805 1806 case SIOCGVH: 1807 { 1808 int priveleged; 1809 1810 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1811 error = EINVAL; 1812 break; 1813 } 1814 if (carpr.carpr_count < 1) { 1815 error = EMSGSIZE; 1816 break; 1817 } 1818 if (ifp->if_carp == NULL) { 1819 error = ENOENT; 1820 break; 1821 } 1822 1823 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1824 if (carpr.carpr_vhid != 0) { 1825 IFNET_FOREACH_CARP(ifp, sc) 1826 if (sc->sc_vhid == carpr.carpr_vhid) 1827 break; 1828 if (sc == NULL) { 1829 error = ENOENT; 1830 break; 1831 } 1832 carp_carprcp(&carpr, sc, priveleged); 1833 error = copyout(&carpr, ifr_data_get_ptr(ifr), 1834 sizeof(carpr)); 1835 } else { 1836 int i, count; 1837 1838 count = 0; 1839 IFNET_FOREACH_CARP(ifp, sc) 1840 count++; 1841 1842 if (count > carpr.carpr_count) { 1843 CIF_UNLOCK(ifp->if_carp); 1844 error = EMSGSIZE; 1845 break; 1846 } 1847 1848 i = 0; 1849 IFNET_FOREACH_CARP(ifp, sc) { 1850 carp_carprcp(&carpr, sc, priveleged); 1851 carpr.carpr_count = count; 1852 error = copyout(&carpr, 1853 (caddr_t)ifr_data_get_ptr(ifr) + 1854 (i * sizeof(carpr)), sizeof(carpr)); 1855 if (error) { 1856 CIF_UNLOCK(ifp->if_carp); 1857 break; 1858 } 1859 i++; 1860 } 1861 } 1862 break; 1863 } 1864 default: 1865 error = EINVAL; 1866 } 1867 sx_xunlock(&carp_sx); 1868 1869 out: 1870 if (locked) 1871 CARP_UNLOCK(sc); 1872 if_rele(ifp); 1873 1874 return (error); 1875 } 1876 1877 static int 1878 carp_get_vhid(struct ifaddr *ifa) 1879 { 1880 1881 if (ifa == NULL || ifa->ifa_carp == NULL) 1882 return (0); 1883 1884 return (ifa->ifa_carp->sc_vhid); 1885 } 1886 1887 int 1888 carp_attach(struct ifaddr *ifa, int vhid) 1889 { 1890 struct ifnet *ifp = ifa->ifa_ifp; 1891 struct carp_if *cif = ifp->if_carp; 1892 struct carp_softc *sc; 1893 int index, error; 1894 1895 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1896 1897 switch (ifa->ifa_addr->sa_family) { 1898 #ifdef INET 1899 case AF_INET: 1900 #endif 1901 #ifdef INET6 1902 case AF_INET6: 1903 #endif 1904 break; 1905 default: 1906 return (EPROTOTYPE); 1907 } 1908 1909 sx_xlock(&carp_sx); 1910 if (ifp->if_carp == NULL) { 1911 sx_xunlock(&carp_sx); 1912 return (ENOPROTOOPT); 1913 } 1914 1915 IFNET_FOREACH_CARP(ifp, sc) 1916 if (sc->sc_vhid == vhid) 1917 break; 1918 if (sc == NULL) { 1919 sx_xunlock(&carp_sx); 1920 return (ENOENT); 1921 } 1922 1923 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1924 if (error) { 1925 CIF_FREE(cif); 1926 sx_xunlock(&carp_sx); 1927 return (error); 1928 } 1929 1930 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1931 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1932 carp_grow_ifas(sc); 1933 1934 switch (ifa->ifa_addr->sa_family) { 1935 #ifdef INET 1936 case AF_INET: 1937 cif->cif_naddrs++; 1938 sc->sc_naddrs++; 1939 break; 1940 #endif 1941 #ifdef INET6 1942 case AF_INET6: 1943 cif->cif_naddrs6++; 1944 sc->sc_naddrs6++; 1945 break; 1946 #endif 1947 } 1948 1949 ifa_ref(ifa); 1950 1951 CARP_LOCK(sc); 1952 sc->sc_ifas[index - 1] = ifa; 1953 ifa->ifa_carp = sc; 1954 carp_hmac_prepare(sc); 1955 carp_sc_state(sc); 1956 CARP_UNLOCK(sc); 1957 1958 sx_xunlock(&carp_sx); 1959 1960 return (0); 1961 } 1962 1963 void 1964 carp_detach(struct ifaddr *ifa, bool keep_cif) 1965 { 1966 struct ifnet *ifp = ifa->ifa_ifp; 1967 struct carp_if *cif = ifp->if_carp; 1968 struct carp_softc *sc = ifa->ifa_carp; 1969 int i, index; 1970 1971 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1972 1973 sx_xlock(&carp_sx); 1974 1975 CARP_LOCK(sc); 1976 /* Shift array. */ 1977 index = sc->sc_naddrs + sc->sc_naddrs6; 1978 for (i = 0; i < index; i++) 1979 if (sc->sc_ifas[i] == ifa) 1980 break; 1981 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1982 for (; i < index - 1; i++) 1983 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1984 sc->sc_ifas[index - 1] = NULL; 1985 1986 switch (ifa->ifa_addr->sa_family) { 1987 #ifdef INET 1988 case AF_INET: 1989 cif->cif_naddrs--; 1990 sc->sc_naddrs--; 1991 break; 1992 #endif 1993 #ifdef INET6 1994 case AF_INET6: 1995 cif->cif_naddrs6--; 1996 sc->sc_naddrs6--; 1997 break; 1998 #endif 1999 } 2000 2001 carp_ifa_delroute(ifa); 2002 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2003 2004 ifa->ifa_carp = NULL; 2005 ifa_free(ifa); 2006 2007 carp_hmac_prepare(sc); 2008 carp_sc_state(sc); 2009 2010 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2011 carp_destroy(sc); 2012 else 2013 CARP_UNLOCK(sc); 2014 2015 if (!keep_cif) 2016 CIF_FREE(cif); 2017 2018 sx_xunlock(&carp_sx); 2019 } 2020 2021 static void 2022 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2023 { 2024 2025 CARP_LOCK_ASSERT(sc); 2026 2027 if (sc->sc_state != state) { 2028 const char *carp_states[] = { CARP_STATES }; 2029 char subsys[IFNAMSIZ+5]; 2030 2031 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2032 sc->sc_carpdev->if_xname); 2033 2034 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2035 carp_states[sc->sc_state], carp_states[state], reason); 2036 2037 sc->sc_state = state; 2038 2039 devctl_notify("CARP", subsys, carp_states[state], NULL); 2040 } 2041 } 2042 2043 static void 2044 carp_linkstate(struct ifnet *ifp) 2045 { 2046 struct carp_softc *sc; 2047 2048 CIF_LOCK(ifp->if_carp); 2049 IFNET_FOREACH_CARP(ifp, sc) { 2050 CARP_LOCK(sc); 2051 carp_sc_state(sc); 2052 CARP_UNLOCK(sc); 2053 } 2054 CIF_UNLOCK(ifp->if_carp); 2055 } 2056 2057 static void 2058 carp_sc_state(struct carp_softc *sc) 2059 { 2060 2061 CARP_LOCK_ASSERT(sc); 2062 2063 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2064 !(sc->sc_carpdev->if_flags & IFF_UP) || 2065 !V_carp_allow) { 2066 callout_stop(&sc->sc_ad_tmo); 2067 #ifdef INET 2068 callout_stop(&sc->sc_md_tmo); 2069 #endif 2070 #ifdef INET6 2071 callout_stop(&sc->sc_md6_tmo); 2072 #endif 2073 carp_set_state(sc, INIT, "hardware interface down"); 2074 carp_setrun(sc, 0); 2075 if (!sc->sc_suppress) 2076 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2077 sc->sc_suppress = 1; 2078 } else { 2079 carp_set_state(sc, INIT, "hardware interface up"); 2080 carp_setrun(sc, 0); 2081 if (sc->sc_suppress) 2082 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2083 sc->sc_suppress = 0; 2084 } 2085 } 2086 2087 static void 2088 carp_demote_adj(int adj, char *reason) 2089 { 2090 atomic_add_int(&V_carp_demotion, adj); 2091 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2092 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2093 } 2094 2095 static int 2096 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2097 { 2098 int new, error; 2099 struct carp_softc *sc; 2100 2101 new = V_carp_allow; 2102 error = sysctl_handle_int(oidp, &new, 0, req); 2103 if (error || !req->newptr) 2104 return (error); 2105 2106 if (V_carp_allow != new) { 2107 V_carp_allow = new; 2108 2109 mtx_lock(&carp_mtx); 2110 LIST_FOREACH(sc, &carp_list, sc_next) { 2111 CARP_LOCK(sc); 2112 if (curvnet == sc->sc_carpdev->if_vnet) 2113 carp_sc_state(sc); 2114 CARP_UNLOCK(sc); 2115 } 2116 mtx_unlock(&carp_mtx); 2117 } 2118 2119 return (0); 2120 } 2121 2122 static int 2123 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2124 { 2125 int new, error; 2126 2127 new = V_carp_dscp; 2128 error = sysctl_handle_int(oidp, &new, 0, req); 2129 if (error || !req->newptr) 2130 return (error); 2131 2132 if (new < 0 || new > 63) 2133 return (EINVAL); 2134 2135 V_carp_dscp = new; 2136 2137 return (0); 2138 } 2139 2140 static int 2141 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2142 { 2143 int new, error; 2144 2145 new = V_carp_demotion; 2146 error = sysctl_handle_int(oidp, &new, 0, req); 2147 if (error || !req->newptr) 2148 return (error); 2149 2150 carp_demote_adj(new, "sysctl"); 2151 2152 return (0); 2153 } 2154 2155 #ifdef INET 2156 extern struct domain inetdomain; 2157 static struct protosw in_carp_protosw = { 2158 .pr_type = SOCK_RAW, 2159 .pr_domain = &inetdomain, 2160 .pr_protocol = IPPROTO_CARP, 2161 .pr_flags = PR_ATOMIC|PR_ADDR, 2162 .pr_input = carp_input, 2163 .pr_output = rip_output, 2164 .pr_ctloutput = rip_ctloutput, 2165 .pr_usrreqs = &rip_usrreqs 2166 }; 2167 #endif 2168 2169 #ifdef INET6 2170 extern struct domain inet6domain; 2171 static struct protosw in6_carp_protosw = { 2172 .pr_type = SOCK_RAW, 2173 .pr_domain = &inet6domain, 2174 .pr_protocol = IPPROTO_CARP, 2175 .pr_flags = PR_ATOMIC|PR_ADDR, 2176 .pr_input = carp6_input, 2177 .pr_output = rip6_output, 2178 .pr_ctloutput = rip6_ctloutput, 2179 .pr_usrreqs = &rip6_usrreqs 2180 }; 2181 #endif 2182 2183 #ifdef VIMAGE 2184 #if defined(__i386__) 2185 /* 2186 * XXX This is a hack to work around an absolute relocation outside 2187 * set_vnet by one (on the stop symbol) for carpstats. Add a dummy variable 2188 * to the end of the file in the hope that the linker will just keep the 2189 * order (as it seems to do at the moment). It is understood to be fragile. 2190 * See PR 230857 for a longer discussion of the problem and the referenced 2191 * review for possible alternate solutions. Each is a hack; we just need 2192 * the least intrusive one for the next release. 2193 */ 2194 VNET_DEFINE(char, carp_zzz) = 0xde; 2195 #endif 2196 #endif 2197 2198 static void 2199 carp_mod_cleanup(void) 2200 { 2201 2202 #ifdef INET 2203 if (proto_reg[CARP_INET] == 0) { 2204 (void)ipproto_unregister(IPPROTO_CARP); 2205 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2206 proto_reg[CARP_INET] = -1; 2207 } 2208 carp_iamatch_p = NULL; 2209 #endif 2210 #ifdef INET6 2211 if (proto_reg[CARP_INET6] == 0) { 2212 (void)ip6proto_unregister(IPPROTO_CARP); 2213 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2214 proto_reg[CARP_INET6] = -1; 2215 } 2216 carp_iamatch6_p = NULL; 2217 carp_macmatch6_p = NULL; 2218 #endif 2219 carp_ioctl_p = NULL; 2220 carp_attach_p = NULL; 2221 carp_detach_p = NULL; 2222 carp_get_vhid_p = NULL; 2223 carp_linkstate_p = NULL; 2224 carp_forus_p = NULL; 2225 carp_output_p = NULL; 2226 carp_demote_adj_p = NULL; 2227 carp_master_p = NULL; 2228 mtx_unlock(&carp_mtx); 2229 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2230 mtx_destroy(&carp_mtx); 2231 sx_destroy(&carp_sx); 2232 } 2233 2234 static int 2235 carp_mod_load(void) 2236 { 2237 int err; 2238 2239 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2240 sx_init(&carp_sx, "carp_sx"); 2241 LIST_INIT(&carp_list); 2242 carp_get_vhid_p = carp_get_vhid; 2243 carp_forus_p = carp_forus; 2244 carp_output_p = carp_output; 2245 carp_linkstate_p = carp_linkstate; 2246 carp_ioctl_p = carp_ioctl; 2247 carp_attach_p = carp_attach; 2248 carp_detach_p = carp_detach; 2249 carp_demote_adj_p = carp_demote_adj; 2250 carp_master_p = carp_master; 2251 #ifdef INET6 2252 carp_iamatch6_p = carp_iamatch6; 2253 carp_macmatch6_p = carp_macmatch6; 2254 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2255 (struct protosw *)&in6_carp_protosw); 2256 if (proto_reg[CARP_INET6]) { 2257 printf("carp: error %d attaching to PF_INET6\n", 2258 proto_reg[CARP_INET6]); 2259 carp_mod_cleanup(); 2260 return (proto_reg[CARP_INET6]); 2261 } 2262 err = ip6proto_register(IPPROTO_CARP); 2263 if (err) { 2264 printf("carp: error %d registering with INET6\n", err); 2265 carp_mod_cleanup(); 2266 return (err); 2267 } 2268 #endif 2269 #ifdef INET 2270 carp_iamatch_p = carp_iamatch; 2271 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2272 if (proto_reg[CARP_INET]) { 2273 printf("carp: error %d attaching to PF_INET\n", 2274 proto_reg[CARP_INET]); 2275 carp_mod_cleanup(); 2276 return (proto_reg[CARP_INET]); 2277 } 2278 err = ipproto_register(IPPROTO_CARP); 2279 if (err) { 2280 printf("carp: error %d registering with INET\n", err); 2281 carp_mod_cleanup(); 2282 return (err); 2283 } 2284 #endif 2285 return (0); 2286 } 2287 2288 static int 2289 carp_modevent(module_t mod, int type, void *data) 2290 { 2291 switch (type) { 2292 case MOD_LOAD: 2293 return carp_mod_load(); 2294 /* NOTREACHED */ 2295 case MOD_UNLOAD: 2296 mtx_lock(&carp_mtx); 2297 if (LIST_EMPTY(&carp_list)) 2298 carp_mod_cleanup(); 2299 else { 2300 mtx_unlock(&carp_mtx); 2301 return (EBUSY); 2302 } 2303 break; 2304 2305 default: 2306 return (EINVAL); 2307 } 2308 2309 return (0); 2310 } 2311 2312 static moduledata_t carp_mod = { 2313 "carp", 2314 carp_modevent, 2315 0 2316 }; 2317 2318 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2319