1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_bpf.h" 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/devctl.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/module.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/sockio.h> 52 #include <sys/sysctl.h> 53 #include <sys/syslog.h> 54 #include <sys/taskqueue.h> 55 #include <sys/counter.h> 56 57 #include <net/ethernet.h> 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_dl.h> 61 #include <net/if_llatbl.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 90 91 struct carp_softc { 92 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 93 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 94 struct sockaddr_dl sc_addr; /* Our link level address. */ 95 struct callout sc_ad_tmo; /* Advertising timeout. */ 96 #ifdef INET 97 struct callout sc_md_tmo; /* Master down timeout. */ 98 #endif 99 #ifdef INET6 100 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 101 #endif 102 struct mtx sc_mtx; 103 104 int sc_vhid; 105 int sc_advskew; 106 int sc_advbase; 107 108 int sc_naddrs; 109 int sc_naddrs6; 110 int sc_ifasiz; 111 enum { INIT = 0, BACKUP, MASTER } sc_state; 112 int sc_suppress; 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_init_counter; 119 uint64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 128 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 129 }; 130 131 struct carp_if { 132 #ifdef INET 133 int cif_naddrs; 134 #endif 135 #ifdef INET6 136 int cif_naddrs6; 137 #endif 138 TAILQ_HEAD(, carp_softc) cif_vrs; 139 #ifdef INET 140 struct ip_moptions cif_imo; 141 #endif 142 #ifdef INET6 143 struct ip6_moptions cif_im6o; 144 #endif 145 struct ifnet *cif_ifp; 146 struct mtx cif_mtx; 147 uint32_t cif_flags; 148 #define CIF_PROMISC 0x00000001 149 }; 150 151 #define CARP_INET 0 152 #define CARP_INET6 1 153 static int proto_reg[] = {-1, -1}; 154 155 /* 156 * Brief design of carp(4). 157 * 158 * Any carp-capable ifnet may have a list of carp softcs hanging off 159 * its ifp->if_carp pointer. Each softc represents one unique virtual 160 * host id, or vhid. The softc has a back pointer to the ifnet. All 161 * softcs are joined in a global list, which has quite limited use. 162 * 163 * Any interface address that takes part in CARP negotiation has a 164 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 165 * AF_INET or AF_INET6 address. 166 * 167 * Although, one can get the softc's backpointer to ifnet and traverse 168 * through its ifp->if_addrhead queue to find all interface addresses 169 * involved in CARP, we keep a growable array of ifaddr pointers. This 170 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 171 * do calls into the network stack, thus avoiding LORs. 172 * 173 * Locking: 174 * 175 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 176 * callout-driven events and ioctl()s. 177 * 178 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 179 * To traverse the global list we use the mutex carp_mtx. 180 * 181 * Known issues with locking: 182 * 183 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 184 * counting is done on the softc. 185 * - On module unload we may race (?) with packet processing thread 186 * dereferencing our function pointers. 187 */ 188 189 /* Accept incoming CARP packets. */ 190 VNET_DEFINE_STATIC(int, carp_allow) = 1; 191 #define V_carp_allow VNET(carp_allow) 192 193 /* Set DSCP in outgoing CARP packets. */ 194 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 195 #define V_carp_dscp VNET(carp_dscp) 196 197 /* Preempt slower nodes. */ 198 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 199 #define V_carp_preempt VNET(carp_preempt) 200 201 /* Log level. */ 202 VNET_DEFINE_STATIC(int, carp_log) = 1; 203 #define V_carp_log VNET(carp_log) 204 205 /* Global advskew demotion. */ 206 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 207 #define V_carp_demotion VNET(carp_demotion) 208 209 /* Send error demotion factor. */ 210 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 211 #define V_carp_senderr_adj VNET(carp_senderr_adj) 212 213 /* Iface down demotion factor. */ 214 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 215 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 216 217 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 218 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 219 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 220 221 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 222 "CARP"); 223 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 224 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 225 0, 0, carp_allow_sysctl, "I", 226 "Accept incoming CARP packets"); 227 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 228 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 229 0, 0, carp_dscp_sysctl, "I", 230 "DSCP value for carp packets"); 231 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 232 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 233 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 234 &VNET_NAME(carp_log), 0, "CARP log level"); 235 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 236 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 237 0, 0, carp_demote_adj_sysctl, "I", 238 "Adjust demotion factor (skew of advskew)"); 239 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 240 CTLFLAG_VNET | CTLFLAG_RW, 241 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 242 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 243 CTLFLAG_VNET | CTLFLAG_RW, 244 &VNET_NAME(carp_ifdown_adj), 0, 245 "Interface down demotion factor adjustment"); 246 247 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 248 VNET_PCPUSTAT_SYSINIT(carpstats); 249 VNET_PCPUSTAT_SYSUNINIT(carpstats); 250 251 #define CARPSTATS_ADD(name, val) \ 252 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 253 sizeof(uint64_t)], (val)) 254 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 255 256 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 257 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 258 259 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 260 NULL, MTX_DEF) 261 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 262 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 263 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 264 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 265 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 266 NULL, MTX_DEF) 267 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 268 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 269 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 270 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 271 #define CIF_FREE(cif) do { \ 272 CIF_LOCK(cif); \ 273 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 274 carp_free_if(cif); \ 275 else \ 276 CIF_UNLOCK(cif); \ 277 } while (0) 278 279 #define CARP_LOG(...) do { \ 280 if (V_carp_log > 0) \ 281 log(LOG_INFO, "carp: " __VA_ARGS__); \ 282 } while (0) 283 284 #define CARP_DEBUG(...) do { \ 285 if (V_carp_log > 1) \ 286 log(LOG_DEBUG, __VA_ARGS__); \ 287 } while (0) 288 289 #define IFNET_FOREACH_IFA(ifp, ifa) \ 290 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 291 if ((ifa)->ifa_carp != NULL) 292 293 #define CARP_FOREACH_IFA(sc, ifa) \ 294 CARP_LOCK_ASSERT(sc); \ 295 for (int _i = 0; \ 296 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 297 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 298 ++_i) 299 300 #define IFNET_FOREACH_CARP(ifp, sc) \ 301 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 302 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 303 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 304 305 #define DEMOTE_ADVSKEW(sc) \ 306 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 307 CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) 308 309 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 310 static struct carp_softc 311 *carp_alloc(struct ifnet *); 312 static void carp_destroy(struct carp_softc *); 313 static struct carp_if 314 *carp_alloc_if(struct ifnet *); 315 static void carp_free_if(struct carp_if *); 316 static void carp_set_state(struct carp_softc *, int, const char* reason); 317 static void carp_sc_state(struct carp_softc *); 318 static void carp_setrun(struct carp_softc *, sa_family_t); 319 static void carp_master_down(void *); 320 static void carp_master_down_locked(struct carp_softc *, 321 const char* reason); 322 static void carp_send_ad(void *); 323 static void carp_send_ad_locked(struct carp_softc *); 324 static void carp_addroute(struct carp_softc *); 325 static void carp_ifa_addroute(struct ifaddr *); 326 static void carp_delroute(struct carp_softc *); 327 static void carp_ifa_delroute(struct ifaddr *); 328 static void carp_send_ad_all(void *, int); 329 static void carp_demote_adj(int, char *); 330 331 static LIST_HEAD(, carp_softc) carp_list; 332 static struct mtx carp_mtx; 333 static struct sx carp_sx; 334 static struct task carp_sendall_task = 335 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 336 337 static void 338 carp_hmac_prepare(struct carp_softc *sc) 339 { 340 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 341 uint8_t vhid = sc->sc_vhid & 0xff; 342 struct ifaddr *ifa; 343 int i, found; 344 #ifdef INET 345 struct in_addr last, cur, in; 346 #endif 347 #ifdef INET6 348 struct in6_addr last6, cur6, in6; 349 #endif 350 351 CARP_LOCK_ASSERT(sc); 352 353 /* Compute ipad from key. */ 354 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 355 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 356 for (i = 0; i < sizeof(sc->sc_pad); i++) 357 sc->sc_pad[i] ^= 0x36; 358 359 /* Precompute first part of inner hash. */ 360 SHA1Init(&sc->sc_sha1); 361 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 362 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 363 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 364 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 365 #ifdef INET 366 cur.s_addr = 0; 367 do { 368 found = 0; 369 last = cur; 370 cur.s_addr = 0xffffffff; 371 CARP_FOREACH_IFA(sc, ifa) { 372 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 373 if (ifa->ifa_addr->sa_family == AF_INET && 374 ntohl(in.s_addr) > ntohl(last.s_addr) && 375 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 376 cur.s_addr = in.s_addr; 377 found++; 378 } 379 } 380 if (found) 381 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 382 } while (found); 383 #endif /* INET */ 384 #ifdef INET6 385 memset(&cur6, 0, sizeof(cur6)); 386 do { 387 found = 0; 388 last6 = cur6; 389 memset(&cur6, 0xff, sizeof(cur6)); 390 CARP_FOREACH_IFA(sc, ifa) { 391 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 392 if (IN6_IS_SCOPE_EMBED(&in6)) 393 in6.s6_addr16[1] = 0; 394 if (ifa->ifa_addr->sa_family == AF_INET6 && 395 memcmp(&in6, &last6, sizeof(in6)) > 0 && 396 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 397 cur6 = in6; 398 found++; 399 } 400 } 401 if (found) 402 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 403 } while (found); 404 #endif /* INET6 */ 405 406 /* convert ipad to opad */ 407 for (i = 0; i < sizeof(sc->sc_pad); i++) 408 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 409 } 410 411 static void 412 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 413 unsigned char md[20]) 414 { 415 SHA1_CTX sha1ctx; 416 417 CARP_LOCK_ASSERT(sc); 418 419 /* fetch first half of inner hash */ 420 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 421 422 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 423 SHA1Final(md, &sha1ctx); 424 425 /* outer hash */ 426 SHA1Init(&sha1ctx); 427 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 428 SHA1Update(&sha1ctx, md, 20); 429 SHA1Final(md, &sha1ctx); 430 } 431 432 static int 433 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 434 unsigned char md[20]) 435 { 436 unsigned char md2[20]; 437 438 CARP_LOCK_ASSERT(sc); 439 440 carp_hmac_generate(sc, counter, md2); 441 442 return (bcmp(md, md2, sizeof(md2))); 443 } 444 445 /* 446 * process input packet. 447 * we have rearranged checks order compared to the rfc, 448 * but it seems more efficient this way or not possible otherwise. 449 */ 450 #ifdef INET 451 int 452 carp_input(struct mbuf **mp, int *offp, int proto) 453 { 454 struct mbuf *m = *mp; 455 struct ip *ip = mtod(m, struct ip *); 456 struct carp_header *ch; 457 int iplen, len; 458 459 iplen = *offp; 460 *mp = NULL; 461 462 CARPSTATS_INC(carps_ipackets); 463 464 if (!V_carp_allow) { 465 m_freem(m); 466 return (IPPROTO_DONE); 467 } 468 469 /* verify that the IP TTL is 255. */ 470 if (ip->ip_ttl != CARP_DFLTTL) { 471 CARPSTATS_INC(carps_badttl); 472 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 473 ip->ip_ttl, 474 m->m_pkthdr.rcvif->if_xname); 475 m_freem(m); 476 return (IPPROTO_DONE); 477 } 478 479 iplen = ip->ip_hl << 2; 480 481 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 482 CARPSTATS_INC(carps_badlen); 483 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 484 "on %s\n", __func__, m->m_len - sizeof(struct ip), 485 m->m_pkthdr.rcvif->if_xname); 486 m_freem(m); 487 return (IPPROTO_DONE); 488 } 489 490 if (iplen + sizeof(*ch) < m->m_len) { 491 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 492 CARPSTATS_INC(carps_hdrops); 493 CARP_DEBUG("%s: pullup failed\n", __func__); 494 return (IPPROTO_DONE); 495 } 496 ip = mtod(m, struct ip *); 497 } 498 ch = (struct carp_header *)((char *)ip + iplen); 499 500 /* 501 * verify that the received packet length is 502 * equal to the CARP header 503 */ 504 len = iplen + sizeof(*ch); 505 if (len > m->m_pkthdr.len) { 506 CARPSTATS_INC(carps_badlen); 507 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 508 m->m_pkthdr.len, 509 m->m_pkthdr.rcvif->if_xname); 510 m_freem(m); 511 return (IPPROTO_DONE); 512 } 513 514 if ((m = m_pullup(m, len)) == NULL) { 515 CARPSTATS_INC(carps_hdrops); 516 return (IPPROTO_DONE); 517 } 518 ip = mtod(m, struct ip *); 519 ch = (struct carp_header *)((char *)ip + iplen); 520 521 /* verify the CARP checksum */ 522 m->m_data += iplen; 523 if (in_cksum(m, len - iplen)) { 524 CARPSTATS_INC(carps_badsum); 525 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 526 m->m_pkthdr.rcvif->if_xname); 527 m_freem(m); 528 return (IPPROTO_DONE); 529 } 530 m->m_data -= iplen; 531 532 carp_input_c(m, ch, AF_INET); 533 return (IPPROTO_DONE); 534 } 535 #endif 536 537 #ifdef INET6 538 int 539 carp6_input(struct mbuf **mp, int *offp, int proto) 540 { 541 struct mbuf *m = *mp; 542 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 543 struct carp_header *ch; 544 u_int len; 545 546 CARPSTATS_INC(carps_ipackets6); 547 548 if (!V_carp_allow) { 549 m_freem(m); 550 return (IPPROTO_DONE); 551 } 552 553 /* check if received on a valid carp interface */ 554 if (m->m_pkthdr.rcvif->if_carp == NULL) { 555 CARPSTATS_INC(carps_badif); 556 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 557 __func__, m->m_pkthdr.rcvif->if_xname); 558 m_freem(m); 559 return (IPPROTO_DONE); 560 } 561 562 /* verify that the IP TTL is 255 */ 563 if (ip6->ip6_hlim != CARP_DFLTTL) { 564 CARPSTATS_INC(carps_badttl); 565 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 566 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 567 m_freem(m); 568 return (IPPROTO_DONE); 569 } 570 571 /* verify that we have a complete carp packet */ 572 if (m->m_len < *offp + sizeof(*ch)) { 573 len = m->m_len; 574 m = m_pullup(m, *offp + sizeof(*ch)); 575 if (m == NULL) { 576 CARPSTATS_INC(carps_badlen); 577 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 578 return (IPPROTO_DONE); 579 } 580 } 581 ch = (struct carp_header *)(mtod(m, char *) + *offp); 582 583 584 /* verify the CARP checksum */ 585 m->m_data += *offp; 586 if (in_cksum(m, sizeof(*ch))) { 587 CARPSTATS_INC(carps_badsum); 588 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 589 m->m_pkthdr.rcvif->if_xname); 590 m_freem(m); 591 return (IPPROTO_DONE); 592 } 593 m->m_data -= *offp; 594 595 carp_input_c(m, ch, AF_INET6); 596 return (IPPROTO_DONE); 597 } 598 #endif /* INET6 */ 599 600 /* 601 * This routine should not be necessary at all, but some switches 602 * (VMWare ESX vswitches) can echo our own packets back at us, 603 * and we must ignore them or they will cause us to drop out of 604 * MASTER mode. 605 * 606 * We cannot catch all cases of network loops. Instead, what we 607 * do here is catch any packet that arrives with a carp header 608 * with a VHID of 0, that comes from an address that is our own. 609 * These packets are by definition "from us" (even if they are from 610 * a misconfigured host that is pretending to be us). 611 * 612 * The VHID test is outside this mini-function. 613 */ 614 static int 615 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 616 { 617 #ifdef INET 618 struct ip *ip4; 619 struct in_addr in4; 620 #endif 621 #ifdef INET6 622 struct ip6_hdr *ip6; 623 struct in6_addr in6; 624 #endif 625 626 switch (af) { 627 #ifdef INET 628 case AF_INET: 629 ip4 = mtod(m, struct ip *); 630 in4 = ifatoia(ifa)->ia_addr.sin_addr; 631 return (in4.s_addr == ip4->ip_src.s_addr); 632 #endif 633 #ifdef INET6 634 case AF_INET6: 635 ip6 = mtod(m, struct ip6_hdr *); 636 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 637 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 638 #endif 639 default: 640 break; 641 } 642 return (0); 643 } 644 645 static void 646 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 647 { 648 struct ifnet *ifp = m->m_pkthdr.rcvif; 649 struct ifaddr *ifa, *match; 650 struct carp_softc *sc; 651 uint64_t tmp_counter; 652 struct timeval sc_tv, ch_tv; 653 int error; 654 655 NET_EPOCH_ASSERT(); 656 657 /* 658 * Verify that the VHID is valid on the receiving interface. 659 * 660 * There should be just one match. If there are none 661 * the VHID is not valid and we drop the packet. If 662 * there are multiple VHID matches, take just the first 663 * one, for compatibility with previous code. While we're 664 * scanning, check for obvious loops in the network topology 665 * (these should never happen, and as noted above, we may 666 * miss real loops; this is just a double-check). 667 */ 668 error = 0; 669 match = NULL; 670 IFNET_FOREACH_IFA(ifp, ifa) { 671 if (match == NULL && ifa->ifa_carp != NULL && 672 ifa->ifa_addr->sa_family == af && 673 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 674 match = ifa; 675 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 676 error = ELOOP; 677 } 678 ifa = error ? NULL : match; 679 if (ifa != NULL) 680 ifa_ref(ifa); 681 682 if (ifa == NULL) { 683 if (error == ELOOP) { 684 CARP_DEBUG("dropping looped packet on interface %s\n", 685 ifp->if_xname); 686 CARPSTATS_INC(carps_badif); /* ??? */ 687 } else { 688 CARPSTATS_INC(carps_badvhid); 689 } 690 m_freem(m); 691 return; 692 } 693 694 /* verify the CARP version. */ 695 if (ch->carp_version != CARP_VERSION) { 696 CARPSTATS_INC(carps_badver); 697 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 698 ch->carp_version); 699 ifa_free(ifa); 700 m_freem(m); 701 return; 702 } 703 704 sc = ifa->ifa_carp; 705 CARP_LOCK(sc); 706 ifa_free(ifa); 707 708 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 709 CARPSTATS_INC(carps_badauth); 710 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 711 sc->sc_vhid, ifp->if_xname); 712 goto out; 713 } 714 715 tmp_counter = ntohl(ch->carp_counter[0]); 716 tmp_counter = tmp_counter<<32; 717 tmp_counter += ntohl(ch->carp_counter[1]); 718 719 /* XXX Replay protection goes here */ 720 721 sc->sc_init_counter = 0; 722 sc->sc_counter = tmp_counter; 723 724 sc_tv.tv_sec = sc->sc_advbase; 725 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 726 ch_tv.tv_sec = ch->carp_advbase; 727 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 728 729 switch (sc->sc_state) { 730 case INIT: 731 break; 732 case MASTER: 733 /* 734 * If we receive an advertisement from a master who's going to 735 * be more frequent than us, go into BACKUP state. 736 */ 737 if (timevalcmp(&sc_tv, &ch_tv, >) || 738 timevalcmp(&sc_tv, &ch_tv, ==)) { 739 callout_stop(&sc->sc_ad_tmo); 740 carp_set_state(sc, BACKUP, 741 "more frequent advertisement received"); 742 carp_setrun(sc, 0); 743 carp_delroute(sc); 744 } 745 break; 746 case BACKUP: 747 /* 748 * If we're pre-empting masters who advertise slower than us, 749 * and this one claims to be slower, treat him as down. 750 */ 751 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 752 carp_master_down_locked(sc, 753 "preempting a slower master"); 754 break; 755 } 756 757 /* 758 * If the master is going to advertise at such a low frequency 759 * that he's guaranteed to time out, we'd might as well just 760 * treat him as timed out now. 761 */ 762 sc_tv.tv_sec = sc->sc_advbase * 3; 763 if (timevalcmp(&sc_tv, &ch_tv, <)) { 764 carp_master_down_locked(sc, "master will time out"); 765 break; 766 } 767 768 /* 769 * Otherwise, we reset the counter and wait for the next 770 * advertisement. 771 */ 772 carp_setrun(sc, af); 773 break; 774 } 775 776 out: 777 CARP_UNLOCK(sc); 778 m_freem(m); 779 } 780 781 static int 782 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 783 { 784 struct m_tag *mtag; 785 786 if (sc->sc_init_counter) { 787 /* this could also be seconds since unix epoch */ 788 sc->sc_counter = arc4random(); 789 sc->sc_counter = sc->sc_counter << 32; 790 sc->sc_counter += arc4random(); 791 } else 792 sc->sc_counter++; 793 794 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 795 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 796 797 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 798 799 /* Tag packet for carp_output */ 800 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 801 M_NOWAIT)) == NULL) { 802 m_freem(m); 803 CARPSTATS_INC(carps_onomem); 804 return (ENOMEM); 805 } 806 bcopy(&sc, mtag + 1, sizeof(sc)); 807 m_tag_prepend(m, mtag); 808 809 return (0); 810 } 811 812 /* 813 * To avoid LORs and possible recursions this function shouldn't 814 * be called directly, but scheduled via taskqueue. 815 */ 816 static void 817 carp_send_ad_all(void *ctx __unused, int pending __unused) 818 { 819 struct carp_softc *sc; 820 struct epoch_tracker et; 821 822 NET_EPOCH_ENTER(et); 823 mtx_lock(&carp_mtx); 824 LIST_FOREACH(sc, &carp_list, sc_next) 825 if (sc->sc_state == MASTER) { 826 CARP_LOCK(sc); 827 CURVNET_SET(sc->sc_carpdev->if_vnet); 828 carp_send_ad_locked(sc); 829 CURVNET_RESTORE(); 830 CARP_UNLOCK(sc); 831 } 832 mtx_unlock(&carp_mtx); 833 NET_EPOCH_EXIT(et); 834 } 835 836 /* Send a periodic advertisement, executed in callout context. */ 837 static void 838 carp_send_ad(void *v) 839 { 840 struct carp_softc *sc = v; 841 struct epoch_tracker et; 842 843 NET_EPOCH_ENTER(et); 844 CARP_LOCK_ASSERT(sc); 845 CURVNET_SET(sc->sc_carpdev->if_vnet); 846 carp_send_ad_locked(sc); 847 CURVNET_RESTORE(); 848 CARP_UNLOCK(sc); 849 NET_EPOCH_EXIT(et); 850 } 851 852 static void 853 carp_send_ad_error(struct carp_softc *sc, int error) 854 { 855 856 if (error) { 857 if (sc->sc_sendad_errors < INT_MAX) 858 sc->sc_sendad_errors++; 859 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 860 static const char fmt[] = "send error %d on %s"; 861 char msg[sizeof(fmt) + IFNAMSIZ]; 862 863 sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); 864 carp_demote_adj(V_carp_senderr_adj, msg); 865 } 866 sc->sc_sendad_success = 0; 867 } else { 868 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && 869 ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 870 static const char fmt[] = "send ok on %s"; 871 char msg[sizeof(fmt) + IFNAMSIZ]; 872 873 sprintf(msg, fmt, sc->sc_carpdev->if_xname); 874 carp_demote_adj(-V_carp_senderr_adj, msg); 875 sc->sc_sendad_errors = 0; 876 } else 877 sc->sc_sendad_errors = 0; 878 } 879 } 880 881 /* 882 * Pick the best ifaddr on the given ifp for sending CARP 883 * advertisements. 884 * 885 * "Best" here is defined by ifa_preferred(). This function is much 886 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 887 * 888 * (This could be simplified to return the actual address, except that 889 * it has a different format in AF_INET and AF_INET6.) 890 */ 891 static struct ifaddr * 892 carp_best_ifa(int af, struct ifnet *ifp) 893 { 894 struct ifaddr *ifa, *best; 895 896 NET_EPOCH_ASSERT(); 897 898 if (af >= AF_MAX) 899 return (NULL); 900 best = NULL; 901 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 902 if (ifa->ifa_addr->sa_family == af && 903 (best == NULL || ifa_preferred(best, ifa))) 904 best = ifa; 905 } 906 if (best != NULL) 907 ifa_ref(best); 908 return (best); 909 } 910 911 static void 912 carp_send_ad_locked(struct carp_softc *sc) 913 { 914 struct carp_header ch; 915 struct timeval tv; 916 struct ifaddr *ifa; 917 struct carp_header *ch_ptr; 918 struct mbuf *m; 919 int len, advskew; 920 921 NET_EPOCH_ASSERT(); 922 CARP_LOCK_ASSERT(sc); 923 924 advskew = DEMOTE_ADVSKEW(sc); 925 tv.tv_sec = sc->sc_advbase; 926 tv.tv_usec = advskew * 1000000 / 256; 927 928 ch.carp_version = CARP_VERSION; 929 ch.carp_type = CARP_ADVERTISEMENT; 930 ch.carp_vhid = sc->sc_vhid; 931 ch.carp_advbase = sc->sc_advbase; 932 ch.carp_advskew = advskew; 933 ch.carp_authlen = 7; /* XXX DEFINE */ 934 ch.carp_pad1 = 0; /* must be zero */ 935 ch.carp_cksum = 0; 936 937 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 938 939 #ifdef INET 940 if (sc->sc_naddrs) { 941 struct ip *ip; 942 943 m = m_gethdr(M_NOWAIT, MT_DATA); 944 if (m == NULL) { 945 CARPSTATS_INC(carps_onomem); 946 goto resched; 947 } 948 len = sizeof(*ip) + sizeof(ch); 949 m->m_pkthdr.len = len; 950 m->m_pkthdr.rcvif = NULL; 951 m->m_len = len; 952 M_ALIGN(m, m->m_len); 953 m->m_flags |= M_MCAST; 954 ip = mtod(m, struct ip *); 955 ip->ip_v = IPVERSION; 956 ip->ip_hl = sizeof(*ip) >> 2; 957 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 958 ip->ip_len = htons(len); 959 ip->ip_off = htons(IP_DF); 960 ip->ip_ttl = CARP_DFLTTL; 961 ip->ip_p = IPPROTO_CARP; 962 ip->ip_sum = 0; 963 ip_fillid(ip); 964 965 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 966 if (ifa != NULL) { 967 ip->ip_src.s_addr = 968 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 969 ifa_free(ifa); 970 } else 971 ip->ip_src.s_addr = 0; 972 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 973 974 ch_ptr = (struct carp_header *)(&ip[1]); 975 bcopy(&ch, ch_ptr, sizeof(ch)); 976 if (carp_prepare_ad(m, sc, ch_ptr)) 977 goto resched; 978 979 m->m_data += sizeof(*ip); 980 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 981 m->m_data -= sizeof(*ip); 982 983 CARPSTATS_INC(carps_opackets); 984 985 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 986 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 987 } 988 #endif /* INET */ 989 #ifdef INET6 990 if (sc->sc_naddrs6) { 991 struct ip6_hdr *ip6; 992 993 m = m_gethdr(M_NOWAIT, MT_DATA); 994 if (m == NULL) { 995 CARPSTATS_INC(carps_onomem); 996 goto resched; 997 } 998 len = sizeof(*ip6) + sizeof(ch); 999 m->m_pkthdr.len = len; 1000 m->m_pkthdr.rcvif = NULL; 1001 m->m_len = len; 1002 M_ALIGN(m, m->m_len); 1003 m->m_flags |= M_MCAST; 1004 ip6 = mtod(m, struct ip6_hdr *); 1005 bzero(ip6, sizeof(*ip6)); 1006 ip6->ip6_vfc |= IPV6_VERSION; 1007 /* Traffic class isn't defined in ip6 struct instead 1008 * it gets offset into flowid field */ 1009 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1010 IPTOS_DSCP_OFFSET)); 1011 ip6->ip6_hlim = CARP_DFLTTL; 1012 ip6->ip6_nxt = IPPROTO_CARP; 1013 1014 /* set the source address */ 1015 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1016 if (ifa != NULL) { 1017 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1018 sizeof(struct in6_addr)); 1019 ifa_free(ifa); 1020 } else 1021 /* This should never happen with IPv6. */ 1022 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1023 1024 /* Set the multicast destination. */ 1025 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1026 ip6->ip6_dst.s6_addr8[15] = 0x12; 1027 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1028 m_freem(m); 1029 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1030 goto resched; 1031 } 1032 1033 ch_ptr = (struct carp_header *)(&ip6[1]); 1034 bcopy(&ch, ch_ptr, sizeof(ch)); 1035 if (carp_prepare_ad(m, sc, ch_ptr)) 1036 goto resched; 1037 1038 m->m_data += sizeof(*ip6); 1039 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1040 m->m_data -= sizeof(*ip6); 1041 1042 CARPSTATS_INC(carps_opackets6); 1043 1044 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1045 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1046 } 1047 #endif /* INET6 */ 1048 1049 resched: 1050 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1051 } 1052 1053 static void 1054 carp_addroute(struct carp_softc *sc) 1055 { 1056 struct ifaddr *ifa; 1057 1058 CARP_FOREACH_IFA(sc, ifa) 1059 carp_ifa_addroute(ifa); 1060 } 1061 1062 static void 1063 carp_ifa_addroute(struct ifaddr *ifa) 1064 { 1065 1066 switch (ifa->ifa_addr->sa_family) { 1067 #ifdef INET 1068 case AF_INET: 1069 in_addprefix(ifatoia(ifa), RTF_UP); 1070 ifa_add_loopback_route(ifa, 1071 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1072 break; 1073 #endif 1074 #ifdef INET6 1075 case AF_INET6: 1076 ifa_add_loopback_route(ifa, 1077 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1078 nd6_add_ifa_lle(ifatoia6(ifa)); 1079 break; 1080 #endif 1081 } 1082 } 1083 1084 static void 1085 carp_delroute(struct carp_softc *sc) 1086 { 1087 struct ifaddr *ifa; 1088 1089 CARP_FOREACH_IFA(sc, ifa) 1090 carp_ifa_delroute(ifa); 1091 } 1092 1093 static void 1094 carp_ifa_delroute(struct ifaddr *ifa) 1095 { 1096 1097 switch (ifa->ifa_addr->sa_family) { 1098 #ifdef INET 1099 case AF_INET: 1100 ifa_del_loopback_route(ifa, 1101 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1102 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1103 break; 1104 #endif 1105 #ifdef INET6 1106 case AF_INET6: 1107 ifa_del_loopback_route(ifa, 1108 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1109 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1110 break; 1111 #endif 1112 } 1113 } 1114 1115 int 1116 carp_master(struct ifaddr *ifa) 1117 { 1118 struct carp_softc *sc = ifa->ifa_carp; 1119 1120 return (sc->sc_state == MASTER); 1121 } 1122 1123 #ifdef INET 1124 /* 1125 * Broadcast a gratuitous ARP request containing 1126 * the virtual router MAC address for each IP address 1127 * associated with the virtual router. 1128 */ 1129 static void 1130 carp_send_arp(struct carp_softc *sc) 1131 { 1132 struct ifaddr *ifa; 1133 struct in_addr addr; 1134 1135 NET_EPOCH_ASSERT(); 1136 1137 CARP_FOREACH_IFA(sc, ifa) { 1138 if (ifa->ifa_addr->sa_family != AF_INET) 1139 continue; 1140 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1141 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1142 } 1143 } 1144 1145 int 1146 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1147 { 1148 struct carp_softc *sc = ifa->ifa_carp; 1149 1150 if (sc->sc_state == MASTER) { 1151 *enaddr = LLADDR(&sc->sc_addr); 1152 return (1); 1153 } 1154 1155 return (0); 1156 } 1157 #endif 1158 1159 #ifdef INET6 1160 static void 1161 carp_send_na(struct carp_softc *sc) 1162 { 1163 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1164 struct ifaddr *ifa; 1165 struct in6_addr *in6; 1166 1167 CARP_FOREACH_IFA(sc, ifa) { 1168 if (ifa->ifa_addr->sa_family != AF_INET6) 1169 continue; 1170 1171 in6 = IFA_IN6(ifa); 1172 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1173 ND_NA_FLAG_OVERRIDE, 1, NULL); 1174 DELAY(1000); /* XXX */ 1175 } 1176 } 1177 1178 /* 1179 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1180 * matches and is not a carp address. Returns NULL otherwise. 1181 */ 1182 struct ifaddr * 1183 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1184 { 1185 struct ifaddr *ifa; 1186 1187 NET_EPOCH_ASSERT(); 1188 1189 ifa = NULL; 1190 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1191 if (ifa->ifa_addr->sa_family != AF_INET6) 1192 continue; 1193 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1194 continue; 1195 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1196 ifa = NULL; 1197 else 1198 ifa_ref(ifa); 1199 break; 1200 } 1201 1202 return (ifa); 1203 } 1204 1205 char * 1206 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1207 { 1208 struct ifaddr *ifa; 1209 1210 NET_EPOCH_ASSERT(); 1211 1212 IFNET_FOREACH_IFA(ifp, ifa) 1213 if (ifa->ifa_addr->sa_family == AF_INET6 && 1214 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1215 struct carp_softc *sc = ifa->ifa_carp; 1216 struct m_tag *mtag; 1217 1218 mtag = m_tag_get(PACKET_TAG_CARP, 1219 sizeof(struct carp_softc *), M_NOWAIT); 1220 if (mtag == NULL) 1221 /* Better a bit than nothing. */ 1222 return (LLADDR(&sc->sc_addr)); 1223 1224 bcopy(&sc, mtag + 1, sizeof(sc)); 1225 m_tag_prepend(m, mtag); 1226 1227 return (LLADDR(&sc->sc_addr)); 1228 } 1229 1230 return (NULL); 1231 } 1232 #endif /* INET6 */ 1233 1234 int 1235 carp_forus(struct ifnet *ifp, u_char *dhost) 1236 { 1237 struct carp_softc *sc; 1238 uint8_t *ena = dhost; 1239 1240 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1241 return (0); 1242 1243 CIF_LOCK(ifp->if_carp); 1244 IFNET_FOREACH_CARP(ifp, sc) { 1245 /* 1246 * CARP_LOCK() is not here, since would protect nothing, but 1247 * cause deadlock with if_bridge, calling this under its lock. 1248 */ 1249 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1250 ETHER_ADDR_LEN)) { 1251 CIF_UNLOCK(ifp->if_carp); 1252 return (1); 1253 } 1254 } 1255 CIF_UNLOCK(ifp->if_carp); 1256 1257 return (0); 1258 } 1259 1260 /* Master down timeout event, executed in callout context. */ 1261 static void 1262 carp_master_down(void *v) 1263 { 1264 struct carp_softc *sc = v; 1265 struct epoch_tracker et; 1266 1267 NET_EPOCH_ENTER(et); 1268 CARP_LOCK_ASSERT(sc); 1269 1270 CURVNET_SET(sc->sc_carpdev->if_vnet); 1271 if (sc->sc_state == BACKUP) { 1272 carp_master_down_locked(sc, "master timed out"); 1273 } 1274 CURVNET_RESTORE(); 1275 1276 CARP_UNLOCK(sc); 1277 NET_EPOCH_EXIT(et); 1278 } 1279 1280 static void 1281 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1282 { 1283 1284 NET_EPOCH_ASSERT(); 1285 CARP_LOCK_ASSERT(sc); 1286 1287 switch (sc->sc_state) { 1288 case BACKUP: 1289 carp_set_state(sc, MASTER, reason); 1290 carp_send_ad_locked(sc); 1291 #ifdef INET 1292 carp_send_arp(sc); 1293 #endif 1294 #ifdef INET6 1295 carp_send_na(sc); 1296 #endif 1297 carp_setrun(sc, 0); 1298 carp_addroute(sc); 1299 break; 1300 case INIT: 1301 case MASTER: 1302 #ifdef INVARIANTS 1303 panic("carp: VHID %u@%s: master_down event in %s state\n", 1304 sc->sc_vhid, 1305 sc->sc_carpdev->if_xname, 1306 sc->sc_state ? "MASTER" : "INIT"); 1307 #endif 1308 break; 1309 } 1310 } 1311 1312 /* 1313 * When in backup state, af indicates whether to reset the master down timer 1314 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1315 */ 1316 static void 1317 carp_setrun(struct carp_softc *sc, sa_family_t af) 1318 { 1319 struct timeval tv; 1320 1321 CARP_LOCK_ASSERT(sc); 1322 1323 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1324 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1325 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1326 !V_carp_allow) 1327 return; 1328 1329 switch (sc->sc_state) { 1330 case INIT: 1331 carp_set_state(sc, BACKUP, "initialization complete"); 1332 carp_setrun(sc, 0); 1333 break; 1334 case BACKUP: 1335 callout_stop(&sc->sc_ad_tmo); 1336 tv.tv_sec = 3 * sc->sc_advbase; 1337 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1338 switch (af) { 1339 #ifdef INET 1340 case AF_INET: 1341 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1342 carp_master_down, sc); 1343 break; 1344 #endif 1345 #ifdef INET6 1346 case AF_INET6: 1347 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1348 carp_master_down, sc); 1349 break; 1350 #endif 1351 default: 1352 #ifdef INET 1353 if (sc->sc_naddrs) 1354 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1355 carp_master_down, sc); 1356 #endif 1357 #ifdef INET6 1358 if (sc->sc_naddrs6) 1359 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1360 carp_master_down, sc); 1361 #endif 1362 break; 1363 } 1364 break; 1365 case MASTER: 1366 tv.tv_sec = sc->sc_advbase; 1367 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1368 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1369 carp_send_ad, sc); 1370 break; 1371 } 1372 } 1373 1374 /* 1375 * Setup multicast structures. 1376 */ 1377 static int 1378 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1379 { 1380 struct ifnet *ifp = cif->cif_ifp; 1381 int error = 0; 1382 1383 switch (sa) { 1384 #ifdef INET 1385 case AF_INET: 1386 { 1387 struct ip_moptions *imo = &cif->cif_imo; 1388 struct in_mfilter *imf; 1389 struct in_addr addr; 1390 1391 if (ip_mfilter_first(&imo->imo_head) != NULL) 1392 return (0); 1393 1394 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1395 ip_mfilter_init(&imo->imo_head); 1396 imo->imo_multicast_vif = -1; 1397 1398 addr.s_addr = htonl(INADDR_CARP_GROUP); 1399 if ((error = in_joingroup(ifp, &addr, NULL, 1400 &imf->imf_inm)) != 0) { 1401 ip_mfilter_free(imf); 1402 break; 1403 } 1404 1405 ip_mfilter_insert(&imo->imo_head, imf); 1406 imo->imo_multicast_ifp = ifp; 1407 imo->imo_multicast_ttl = CARP_DFLTTL; 1408 imo->imo_multicast_loop = 0; 1409 break; 1410 } 1411 #endif 1412 #ifdef INET6 1413 case AF_INET6: 1414 { 1415 struct ip6_moptions *im6o = &cif->cif_im6o; 1416 struct in6_mfilter *im6f[2]; 1417 struct in6_addr in6; 1418 1419 if (ip6_mfilter_first(&im6o->im6o_head)) 1420 return (0); 1421 1422 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1423 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1424 1425 ip6_mfilter_init(&im6o->im6o_head); 1426 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1427 im6o->im6o_multicast_ifp = ifp; 1428 1429 /* Join IPv6 CARP multicast group. */ 1430 bzero(&in6, sizeof(in6)); 1431 in6.s6_addr16[0] = htons(0xff02); 1432 in6.s6_addr8[15] = 0x12; 1433 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1434 ip6_mfilter_free(im6f[0]); 1435 ip6_mfilter_free(im6f[1]); 1436 break; 1437 } 1438 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1439 ip6_mfilter_free(im6f[0]); 1440 ip6_mfilter_free(im6f[1]); 1441 break; 1442 } 1443 1444 /* Join solicited multicast address. */ 1445 bzero(&in6, sizeof(in6)); 1446 in6.s6_addr16[0] = htons(0xff02); 1447 in6.s6_addr32[1] = 0; 1448 in6.s6_addr32[2] = htonl(1); 1449 in6.s6_addr32[3] = 0; 1450 in6.s6_addr8[12] = 0xff; 1451 1452 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1453 ip6_mfilter_free(im6f[0]); 1454 ip6_mfilter_free(im6f[1]); 1455 break; 1456 } 1457 1458 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1459 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1460 ip6_mfilter_free(im6f[0]); 1461 ip6_mfilter_free(im6f[1]); 1462 break; 1463 } 1464 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1465 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1466 break; 1467 } 1468 #endif 1469 } 1470 1471 return (error); 1472 } 1473 1474 /* 1475 * Free multicast structures. 1476 */ 1477 static void 1478 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1479 { 1480 #ifdef INET 1481 struct ip_moptions *imo = &cif->cif_imo; 1482 struct in_mfilter *imf; 1483 #endif 1484 #ifdef INET6 1485 struct ip6_moptions *im6o = &cif->cif_im6o; 1486 struct in6_mfilter *im6f; 1487 #endif 1488 sx_assert(&carp_sx, SA_XLOCKED); 1489 1490 switch (sa) { 1491 #ifdef INET 1492 case AF_INET: 1493 if (cif->cif_naddrs != 0) 1494 break; 1495 1496 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1497 ip_mfilter_remove(&imo->imo_head, imf); 1498 in_leavegroup(imf->imf_inm, NULL); 1499 ip_mfilter_free(imf); 1500 } 1501 break; 1502 #endif 1503 #ifdef INET6 1504 case AF_INET6: 1505 if (cif->cif_naddrs6 != 0) 1506 break; 1507 1508 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 1509 ip6_mfilter_remove(&im6o->im6o_head, im6f); 1510 in6_leavegroup(im6f->im6f_in6m, NULL); 1511 ip6_mfilter_free(im6f); 1512 } 1513 break; 1514 #endif 1515 } 1516 } 1517 1518 int 1519 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1520 { 1521 struct m_tag *mtag; 1522 struct carp_softc *sc; 1523 1524 if (!sa) 1525 return (0); 1526 1527 switch (sa->sa_family) { 1528 #ifdef INET 1529 case AF_INET: 1530 break; 1531 #endif 1532 #ifdef INET6 1533 case AF_INET6: 1534 break; 1535 #endif 1536 default: 1537 return (0); 1538 } 1539 1540 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1541 if (mtag == NULL) 1542 return (0); 1543 1544 bcopy(mtag + 1, &sc, sizeof(sc)); 1545 1546 /* Set the source MAC address to the Virtual Router MAC Address. */ 1547 switch (ifp->if_type) { 1548 case IFT_ETHER: 1549 case IFT_BRIDGE: 1550 case IFT_L2VLAN: { 1551 struct ether_header *eh; 1552 1553 eh = mtod(m, struct ether_header *); 1554 eh->ether_shost[0] = 0; 1555 eh->ether_shost[1] = 0; 1556 eh->ether_shost[2] = 0x5e; 1557 eh->ether_shost[3] = 0; 1558 eh->ether_shost[4] = 1; 1559 eh->ether_shost[5] = sc->sc_vhid; 1560 } 1561 break; 1562 default: 1563 printf("%s: carp is not supported for the %d interface type\n", 1564 ifp->if_xname, ifp->if_type); 1565 return (EOPNOTSUPP); 1566 } 1567 1568 return (0); 1569 } 1570 1571 static struct carp_softc* 1572 carp_alloc(struct ifnet *ifp) 1573 { 1574 struct carp_softc *sc; 1575 struct carp_if *cif; 1576 1577 sx_assert(&carp_sx, SA_XLOCKED); 1578 1579 if ((cif = ifp->if_carp) == NULL) 1580 cif = carp_alloc_if(ifp); 1581 1582 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1583 1584 sc->sc_advbase = CARP_DFLTINTV; 1585 sc->sc_vhid = -1; /* required setting */ 1586 sc->sc_init_counter = 1; 1587 sc->sc_state = INIT; 1588 1589 sc->sc_ifasiz = sizeof(struct ifaddr *); 1590 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1591 sc->sc_carpdev = ifp; 1592 1593 CARP_LOCK_INIT(sc); 1594 #ifdef INET 1595 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1596 #endif 1597 #ifdef INET6 1598 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1599 #endif 1600 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1601 1602 CIF_LOCK(cif); 1603 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1604 CIF_UNLOCK(cif); 1605 1606 mtx_lock(&carp_mtx); 1607 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1608 mtx_unlock(&carp_mtx); 1609 1610 return (sc); 1611 } 1612 1613 static void 1614 carp_grow_ifas(struct carp_softc *sc) 1615 { 1616 struct ifaddr **new; 1617 1618 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1619 CARP_LOCK(sc); 1620 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1621 free(sc->sc_ifas, M_CARP); 1622 sc->sc_ifas = new; 1623 sc->sc_ifasiz *= 2; 1624 CARP_UNLOCK(sc); 1625 } 1626 1627 static void 1628 carp_destroy(struct carp_softc *sc) 1629 { 1630 struct ifnet *ifp = sc->sc_carpdev; 1631 struct carp_if *cif = ifp->if_carp; 1632 1633 sx_assert(&carp_sx, SA_XLOCKED); 1634 1635 if (sc->sc_suppress) 1636 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1637 CARP_UNLOCK(sc); 1638 1639 CIF_LOCK(cif); 1640 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1641 CIF_UNLOCK(cif); 1642 1643 mtx_lock(&carp_mtx); 1644 LIST_REMOVE(sc, sc_next); 1645 mtx_unlock(&carp_mtx); 1646 1647 callout_drain(&sc->sc_ad_tmo); 1648 #ifdef INET 1649 callout_drain(&sc->sc_md_tmo); 1650 #endif 1651 #ifdef INET6 1652 callout_drain(&sc->sc_md6_tmo); 1653 #endif 1654 CARP_LOCK_DESTROY(sc); 1655 1656 free(sc->sc_ifas, M_CARP); 1657 free(sc, M_CARP); 1658 } 1659 1660 static struct carp_if* 1661 carp_alloc_if(struct ifnet *ifp) 1662 { 1663 struct carp_if *cif; 1664 int error; 1665 1666 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1667 1668 if ((error = ifpromisc(ifp, 1)) != 0) 1669 printf("%s: ifpromisc(%s) failed: %d\n", 1670 __func__, ifp->if_xname, error); 1671 else 1672 cif->cif_flags |= CIF_PROMISC; 1673 1674 CIF_LOCK_INIT(cif); 1675 cif->cif_ifp = ifp; 1676 TAILQ_INIT(&cif->cif_vrs); 1677 1678 IF_ADDR_WLOCK(ifp); 1679 ifp->if_carp = cif; 1680 if_ref(ifp); 1681 IF_ADDR_WUNLOCK(ifp); 1682 1683 return (cif); 1684 } 1685 1686 static void 1687 carp_free_if(struct carp_if *cif) 1688 { 1689 struct ifnet *ifp = cif->cif_ifp; 1690 1691 CIF_LOCK_ASSERT(cif); 1692 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1693 __func__)); 1694 1695 IF_ADDR_WLOCK(ifp); 1696 ifp->if_carp = NULL; 1697 IF_ADDR_WUNLOCK(ifp); 1698 1699 CIF_LOCK_DESTROY(cif); 1700 1701 if (cif->cif_flags & CIF_PROMISC) 1702 ifpromisc(ifp, 0); 1703 if_rele(ifp); 1704 1705 free(cif, M_CARP); 1706 } 1707 1708 static void 1709 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1710 { 1711 1712 CARP_LOCK(sc); 1713 carpr->carpr_state = sc->sc_state; 1714 carpr->carpr_vhid = sc->sc_vhid; 1715 carpr->carpr_advbase = sc->sc_advbase; 1716 carpr->carpr_advskew = sc->sc_advskew; 1717 if (priv) 1718 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1719 else 1720 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1721 CARP_UNLOCK(sc); 1722 } 1723 1724 int 1725 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1726 { 1727 struct carpreq carpr; 1728 struct ifnet *ifp; 1729 struct carp_softc *sc = NULL; 1730 int error = 0, locked = 0; 1731 1732 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 1733 return (error); 1734 1735 ifp = ifunit_ref(ifr->ifr_name); 1736 if (ifp == NULL) 1737 return (ENXIO); 1738 1739 switch (ifp->if_type) { 1740 case IFT_ETHER: 1741 case IFT_L2VLAN: 1742 case IFT_BRIDGE: 1743 break; 1744 default: 1745 error = EOPNOTSUPP; 1746 goto out; 1747 } 1748 1749 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1750 error = EADDRNOTAVAIL; 1751 goto out; 1752 } 1753 1754 sx_xlock(&carp_sx); 1755 switch (cmd) { 1756 case SIOCSVH: 1757 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1758 break; 1759 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1760 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1761 error = EINVAL; 1762 break; 1763 } 1764 1765 if (ifp->if_carp) { 1766 IFNET_FOREACH_CARP(ifp, sc) 1767 if (sc->sc_vhid == carpr.carpr_vhid) 1768 break; 1769 } 1770 if (sc == NULL) { 1771 sc = carp_alloc(ifp); 1772 CARP_LOCK(sc); 1773 sc->sc_vhid = carpr.carpr_vhid; 1774 LLADDR(&sc->sc_addr)[0] = 0; 1775 LLADDR(&sc->sc_addr)[1] = 0; 1776 LLADDR(&sc->sc_addr)[2] = 0x5e; 1777 LLADDR(&sc->sc_addr)[3] = 0; 1778 LLADDR(&sc->sc_addr)[4] = 1; 1779 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1780 } else 1781 CARP_LOCK(sc); 1782 locked = 1; 1783 if (carpr.carpr_advbase > 0) { 1784 if (carpr.carpr_advbase > 255 || 1785 carpr.carpr_advbase < CARP_DFLTINTV) { 1786 error = EINVAL; 1787 break; 1788 } 1789 sc->sc_advbase = carpr.carpr_advbase; 1790 } 1791 if (carpr.carpr_advskew >= 255) { 1792 error = EINVAL; 1793 break; 1794 } 1795 sc->sc_advskew = carpr.carpr_advskew; 1796 if (carpr.carpr_key[0] != '\0') { 1797 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1798 carp_hmac_prepare(sc); 1799 } 1800 if (sc->sc_state != INIT && 1801 carpr.carpr_state != sc->sc_state) { 1802 switch (carpr.carpr_state) { 1803 case BACKUP: 1804 callout_stop(&sc->sc_ad_tmo); 1805 carp_set_state(sc, BACKUP, 1806 "user requested via ifconfig"); 1807 carp_setrun(sc, 0); 1808 carp_delroute(sc); 1809 break; 1810 case MASTER: 1811 carp_master_down_locked(sc, 1812 "user requested via ifconfig"); 1813 break; 1814 default: 1815 break; 1816 } 1817 } 1818 break; 1819 1820 case SIOCGVH: 1821 { 1822 int priveleged; 1823 1824 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1825 error = EINVAL; 1826 break; 1827 } 1828 if (carpr.carpr_count < 1) { 1829 error = EMSGSIZE; 1830 break; 1831 } 1832 if (ifp->if_carp == NULL) { 1833 error = ENOENT; 1834 break; 1835 } 1836 1837 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1838 if (carpr.carpr_vhid != 0) { 1839 IFNET_FOREACH_CARP(ifp, sc) 1840 if (sc->sc_vhid == carpr.carpr_vhid) 1841 break; 1842 if (sc == NULL) { 1843 error = ENOENT; 1844 break; 1845 } 1846 carp_carprcp(&carpr, sc, priveleged); 1847 error = copyout(&carpr, ifr_data_get_ptr(ifr), 1848 sizeof(carpr)); 1849 } else { 1850 int i, count; 1851 1852 count = 0; 1853 IFNET_FOREACH_CARP(ifp, sc) 1854 count++; 1855 1856 if (count > carpr.carpr_count) { 1857 CIF_UNLOCK(ifp->if_carp); 1858 error = EMSGSIZE; 1859 break; 1860 } 1861 1862 i = 0; 1863 IFNET_FOREACH_CARP(ifp, sc) { 1864 carp_carprcp(&carpr, sc, priveleged); 1865 carpr.carpr_count = count; 1866 error = copyout(&carpr, 1867 (char *)ifr_data_get_ptr(ifr) + 1868 (i * sizeof(carpr)), sizeof(carpr)); 1869 if (error) { 1870 CIF_UNLOCK(ifp->if_carp); 1871 break; 1872 } 1873 i++; 1874 } 1875 } 1876 break; 1877 } 1878 default: 1879 error = EINVAL; 1880 } 1881 sx_xunlock(&carp_sx); 1882 1883 out: 1884 if (locked) 1885 CARP_UNLOCK(sc); 1886 if_rele(ifp); 1887 1888 return (error); 1889 } 1890 1891 static int 1892 carp_get_vhid(struct ifaddr *ifa) 1893 { 1894 1895 if (ifa == NULL || ifa->ifa_carp == NULL) 1896 return (0); 1897 1898 return (ifa->ifa_carp->sc_vhid); 1899 } 1900 1901 int 1902 carp_attach(struct ifaddr *ifa, int vhid) 1903 { 1904 struct ifnet *ifp = ifa->ifa_ifp; 1905 struct carp_if *cif = ifp->if_carp; 1906 struct carp_softc *sc; 1907 int index, error; 1908 1909 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1910 1911 switch (ifa->ifa_addr->sa_family) { 1912 #ifdef INET 1913 case AF_INET: 1914 #endif 1915 #ifdef INET6 1916 case AF_INET6: 1917 #endif 1918 break; 1919 default: 1920 return (EPROTOTYPE); 1921 } 1922 1923 sx_xlock(&carp_sx); 1924 if (ifp->if_carp == NULL) { 1925 sx_xunlock(&carp_sx); 1926 return (ENOPROTOOPT); 1927 } 1928 1929 IFNET_FOREACH_CARP(ifp, sc) 1930 if (sc->sc_vhid == vhid) 1931 break; 1932 if (sc == NULL) { 1933 sx_xunlock(&carp_sx); 1934 return (ENOENT); 1935 } 1936 1937 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1938 if (error) { 1939 CIF_FREE(cif); 1940 sx_xunlock(&carp_sx); 1941 return (error); 1942 } 1943 1944 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1945 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1946 carp_grow_ifas(sc); 1947 1948 switch (ifa->ifa_addr->sa_family) { 1949 #ifdef INET 1950 case AF_INET: 1951 cif->cif_naddrs++; 1952 sc->sc_naddrs++; 1953 break; 1954 #endif 1955 #ifdef INET6 1956 case AF_INET6: 1957 cif->cif_naddrs6++; 1958 sc->sc_naddrs6++; 1959 break; 1960 #endif 1961 } 1962 1963 ifa_ref(ifa); 1964 1965 CARP_LOCK(sc); 1966 sc->sc_ifas[index - 1] = ifa; 1967 ifa->ifa_carp = sc; 1968 carp_hmac_prepare(sc); 1969 carp_sc_state(sc); 1970 CARP_UNLOCK(sc); 1971 1972 sx_xunlock(&carp_sx); 1973 1974 return (0); 1975 } 1976 1977 void 1978 carp_detach(struct ifaddr *ifa, bool keep_cif) 1979 { 1980 struct ifnet *ifp = ifa->ifa_ifp; 1981 struct carp_if *cif = ifp->if_carp; 1982 struct carp_softc *sc = ifa->ifa_carp; 1983 int i, index; 1984 1985 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1986 1987 sx_xlock(&carp_sx); 1988 1989 CARP_LOCK(sc); 1990 /* Shift array. */ 1991 index = sc->sc_naddrs + sc->sc_naddrs6; 1992 for (i = 0; i < index; i++) 1993 if (sc->sc_ifas[i] == ifa) 1994 break; 1995 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1996 for (; i < index - 1; i++) 1997 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1998 sc->sc_ifas[index - 1] = NULL; 1999 2000 switch (ifa->ifa_addr->sa_family) { 2001 #ifdef INET 2002 case AF_INET: 2003 cif->cif_naddrs--; 2004 sc->sc_naddrs--; 2005 break; 2006 #endif 2007 #ifdef INET6 2008 case AF_INET6: 2009 cif->cif_naddrs6--; 2010 sc->sc_naddrs6--; 2011 break; 2012 #endif 2013 } 2014 2015 carp_ifa_delroute(ifa); 2016 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2017 2018 ifa->ifa_carp = NULL; 2019 ifa_free(ifa); 2020 2021 carp_hmac_prepare(sc); 2022 carp_sc_state(sc); 2023 2024 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2025 carp_destroy(sc); 2026 else 2027 CARP_UNLOCK(sc); 2028 2029 if (!keep_cif) 2030 CIF_FREE(cif); 2031 2032 sx_xunlock(&carp_sx); 2033 } 2034 2035 static void 2036 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2037 { 2038 2039 CARP_LOCK_ASSERT(sc); 2040 2041 if (sc->sc_state != state) { 2042 const char *carp_states[] = { CARP_STATES }; 2043 char subsys[IFNAMSIZ+5]; 2044 2045 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2046 sc->sc_carpdev->if_xname); 2047 2048 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2049 carp_states[sc->sc_state], carp_states[state], reason); 2050 2051 sc->sc_state = state; 2052 2053 devctl_notify("CARP", subsys, carp_states[state], NULL); 2054 } 2055 } 2056 2057 static void 2058 carp_linkstate(struct ifnet *ifp) 2059 { 2060 struct carp_softc *sc; 2061 2062 CIF_LOCK(ifp->if_carp); 2063 IFNET_FOREACH_CARP(ifp, sc) { 2064 CARP_LOCK(sc); 2065 carp_sc_state(sc); 2066 CARP_UNLOCK(sc); 2067 } 2068 CIF_UNLOCK(ifp->if_carp); 2069 } 2070 2071 static void 2072 carp_sc_state(struct carp_softc *sc) 2073 { 2074 2075 CARP_LOCK_ASSERT(sc); 2076 2077 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2078 !(sc->sc_carpdev->if_flags & IFF_UP) || 2079 !V_carp_allow) { 2080 callout_stop(&sc->sc_ad_tmo); 2081 #ifdef INET 2082 callout_stop(&sc->sc_md_tmo); 2083 #endif 2084 #ifdef INET6 2085 callout_stop(&sc->sc_md6_tmo); 2086 #endif 2087 carp_set_state(sc, INIT, "hardware interface down"); 2088 carp_setrun(sc, 0); 2089 if (!sc->sc_suppress) 2090 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2091 sc->sc_suppress = 1; 2092 } else { 2093 carp_set_state(sc, INIT, "hardware interface up"); 2094 carp_setrun(sc, 0); 2095 if (sc->sc_suppress) 2096 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2097 sc->sc_suppress = 0; 2098 } 2099 } 2100 2101 static void 2102 carp_demote_adj(int adj, char *reason) 2103 { 2104 atomic_add_int(&V_carp_demotion, adj); 2105 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2106 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2107 } 2108 2109 static int 2110 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2111 { 2112 int new, error; 2113 struct carp_softc *sc; 2114 2115 new = V_carp_allow; 2116 error = sysctl_handle_int(oidp, &new, 0, req); 2117 if (error || !req->newptr) 2118 return (error); 2119 2120 if (V_carp_allow != new) { 2121 V_carp_allow = new; 2122 2123 mtx_lock(&carp_mtx); 2124 LIST_FOREACH(sc, &carp_list, sc_next) { 2125 CARP_LOCK(sc); 2126 if (curvnet == sc->sc_carpdev->if_vnet) 2127 carp_sc_state(sc); 2128 CARP_UNLOCK(sc); 2129 } 2130 mtx_unlock(&carp_mtx); 2131 } 2132 2133 return (0); 2134 } 2135 2136 static int 2137 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2138 { 2139 int new, error; 2140 2141 new = V_carp_dscp; 2142 error = sysctl_handle_int(oidp, &new, 0, req); 2143 if (error || !req->newptr) 2144 return (error); 2145 2146 if (new < 0 || new > 63) 2147 return (EINVAL); 2148 2149 V_carp_dscp = new; 2150 2151 return (0); 2152 } 2153 2154 static int 2155 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2156 { 2157 int new, error; 2158 2159 new = V_carp_demotion; 2160 error = sysctl_handle_int(oidp, &new, 0, req); 2161 if (error || !req->newptr) 2162 return (error); 2163 2164 carp_demote_adj(new, "sysctl"); 2165 2166 return (0); 2167 } 2168 2169 #ifdef INET 2170 extern struct domain inetdomain; 2171 static struct protosw in_carp_protosw = { 2172 .pr_type = SOCK_RAW, 2173 .pr_domain = &inetdomain, 2174 .pr_protocol = IPPROTO_CARP, 2175 .pr_flags = PR_ATOMIC|PR_ADDR, 2176 .pr_input = carp_input, 2177 .pr_output = rip_output, 2178 .pr_ctloutput = rip_ctloutput, 2179 .pr_usrreqs = &rip_usrreqs 2180 }; 2181 #endif 2182 2183 #ifdef INET6 2184 extern struct domain inet6domain; 2185 static struct protosw in6_carp_protosw = { 2186 .pr_type = SOCK_RAW, 2187 .pr_domain = &inet6domain, 2188 .pr_protocol = IPPROTO_CARP, 2189 .pr_flags = PR_ATOMIC|PR_ADDR, 2190 .pr_input = carp6_input, 2191 .pr_output = rip6_output, 2192 .pr_ctloutput = rip6_ctloutput, 2193 .pr_usrreqs = &rip6_usrreqs 2194 }; 2195 #endif 2196 2197 static void 2198 carp_mod_cleanup(void) 2199 { 2200 2201 #ifdef INET 2202 if (proto_reg[CARP_INET] == 0) { 2203 (void)ipproto_unregister(IPPROTO_CARP); 2204 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2205 proto_reg[CARP_INET] = -1; 2206 } 2207 carp_iamatch_p = NULL; 2208 #endif 2209 #ifdef INET6 2210 if (proto_reg[CARP_INET6] == 0) { 2211 (void)ip6proto_unregister(IPPROTO_CARP); 2212 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2213 proto_reg[CARP_INET6] = -1; 2214 } 2215 carp_iamatch6_p = NULL; 2216 carp_macmatch6_p = NULL; 2217 #endif 2218 carp_ioctl_p = NULL; 2219 carp_attach_p = NULL; 2220 carp_detach_p = NULL; 2221 carp_get_vhid_p = NULL; 2222 carp_linkstate_p = NULL; 2223 carp_forus_p = NULL; 2224 carp_output_p = NULL; 2225 carp_demote_adj_p = NULL; 2226 carp_master_p = NULL; 2227 mtx_unlock(&carp_mtx); 2228 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2229 mtx_destroy(&carp_mtx); 2230 sx_destroy(&carp_sx); 2231 } 2232 2233 static int 2234 carp_mod_load(void) 2235 { 2236 int err; 2237 2238 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2239 sx_init(&carp_sx, "carp_sx"); 2240 LIST_INIT(&carp_list); 2241 carp_get_vhid_p = carp_get_vhid; 2242 carp_forus_p = carp_forus; 2243 carp_output_p = carp_output; 2244 carp_linkstate_p = carp_linkstate; 2245 carp_ioctl_p = carp_ioctl; 2246 carp_attach_p = carp_attach; 2247 carp_detach_p = carp_detach; 2248 carp_demote_adj_p = carp_demote_adj; 2249 carp_master_p = carp_master; 2250 #ifdef INET6 2251 carp_iamatch6_p = carp_iamatch6; 2252 carp_macmatch6_p = carp_macmatch6; 2253 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2254 (struct protosw *)&in6_carp_protosw); 2255 if (proto_reg[CARP_INET6]) { 2256 printf("carp: error %d attaching to PF_INET6\n", 2257 proto_reg[CARP_INET6]); 2258 carp_mod_cleanup(); 2259 return (proto_reg[CARP_INET6]); 2260 } 2261 err = ip6proto_register(IPPROTO_CARP); 2262 if (err) { 2263 printf("carp: error %d registering with INET6\n", err); 2264 carp_mod_cleanup(); 2265 return (err); 2266 } 2267 #endif 2268 #ifdef INET 2269 carp_iamatch_p = carp_iamatch; 2270 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2271 if (proto_reg[CARP_INET]) { 2272 printf("carp: error %d attaching to PF_INET\n", 2273 proto_reg[CARP_INET]); 2274 carp_mod_cleanup(); 2275 return (proto_reg[CARP_INET]); 2276 } 2277 err = ipproto_register(IPPROTO_CARP); 2278 if (err) { 2279 printf("carp: error %d registering with INET\n", err); 2280 carp_mod_cleanup(); 2281 return (err); 2282 } 2283 #endif 2284 return (0); 2285 } 2286 2287 static int 2288 carp_modevent(module_t mod, int type, void *data) 2289 { 2290 switch (type) { 2291 case MOD_LOAD: 2292 return carp_mod_load(); 2293 /* NOTREACHED */ 2294 case MOD_UNLOAD: 2295 mtx_lock(&carp_mtx); 2296 if (LIST_EMPTY(&carp_list)) 2297 carp_mod_cleanup(); 2298 else { 2299 mtx_unlock(&carp_mtx); 2300 return (EBUSY); 2301 } 2302 break; 2303 2304 default: 2305 return (EINVAL); 2306 } 2307 2308 return (0); 2309 } 2310 2311 static moduledata_t carp_mod = { 2312 "carp", 2313 carp_modevent, 2314 0 2315 }; 2316 2317 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2318