1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_bpf.h" 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bus.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/module.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/sockio.h> 52 #include <sys/sysctl.h> 53 #include <sys/syslog.h> 54 #include <sys/taskqueue.h> 55 #include <sys/counter.h> 56 57 #include <net/ethernet.h> 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_dl.h> 61 #include <net/if_llatbl.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 90 91 struct carp_softc { 92 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 93 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 94 struct sockaddr_dl sc_addr; /* Our link level address. */ 95 struct callout sc_ad_tmo; /* Advertising timeout. */ 96 #ifdef INET 97 struct callout sc_md_tmo; /* Master down timeout. */ 98 #endif 99 #ifdef INET6 100 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 101 #endif 102 struct mtx sc_mtx; 103 104 int sc_vhid; 105 int sc_advskew; 106 int sc_advbase; 107 108 int sc_naddrs; 109 int sc_naddrs6; 110 int sc_ifasiz; 111 enum { INIT = 0, BACKUP, MASTER } sc_state; 112 int sc_suppress; 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_init_counter; 119 uint64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 128 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 129 }; 130 131 struct carp_if { 132 #ifdef INET 133 int cif_naddrs; 134 #endif 135 #ifdef INET6 136 int cif_naddrs6; 137 #endif 138 TAILQ_HEAD(, carp_softc) cif_vrs; 139 #ifdef INET 140 struct ip_moptions cif_imo; 141 #endif 142 #ifdef INET6 143 struct ip6_moptions cif_im6o; 144 #endif 145 struct ifnet *cif_ifp; 146 struct mtx cif_mtx; 147 uint32_t cif_flags; 148 #define CIF_PROMISC 0x00000001 149 }; 150 151 #define CARP_INET 0 152 #define CARP_INET6 1 153 static int proto_reg[] = {-1, -1}; 154 155 /* 156 * Brief design of carp(4). 157 * 158 * Any carp-capable ifnet may have a list of carp softcs hanging off 159 * its ifp->if_carp pointer. Each softc represents one unique virtual 160 * host id, or vhid. The softc has a back pointer to the ifnet. All 161 * softcs are joined in a global list, which has quite limited use. 162 * 163 * Any interface address that takes part in CARP negotiation has a 164 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 165 * AF_INET or AF_INET6 address. 166 * 167 * Although, one can get the softc's backpointer to ifnet and traverse 168 * through its ifp->if_addrhead queue to find all interface addresses 169 * involved in CARP, we keep a growable array of ifaddr pointers. This 170 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 171 * do calls into the network stack, thus avoiding LORs. 172 * 173 * Locking: 174 * 175 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 176 * callout-driven events and ioctl()s. 177 * 178 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 179 * To traverse the global list we use the mutex carp_mtx. 180 * 181 * Known issues with locking: 182 * 183 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 184 * counting is done on the softc. 185 * - On module unload we may race (?) with packet processing thread 186 * dereferencing our function pointers. 187 */ 188 189 /* Accept incoming CARP packets. */ 190 VNET_DEFINE_STATIC(int, carp_allow) = 1; 191 #define V_carp_allow VNET(carp_allow) 192 193 /* Set DSCP in outgoing CARP packets. */ 194 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 195 #define V_carp_dscp VNET(carp_dscp) 196 197 /* Preempt slower nodes. */ 198 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 199 #define V_carp_preempt VNET(carp_preempt) 200 201 /* Log level. */ 202 VNET_DEFINE_STATIC(int, carp_log) = 1; 203 #define V_carp_log VNET(carp_log) 204 205 /* Global advskew demotion. */ 206 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 207 #define V_carp_demotion VNET(carp_demotion) 208 209 /* Send error demotion factor. */ 210 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 211 #define V_carp_senderr_adj VNET(carp_senderr_adj) 212 213 /* Iface down demotion factor. */ 214 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 215 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 216 217 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 218 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 219 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 220 221 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 222 "CARP"); 223 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 224 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 225 0, 0, carp_allow_sysctl, "I", 226 "Accept incoming CARP packets"); 227 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 228 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 229 0, 0, carp_dscp_sysctl, "I", 230 "DSCP value for carp packets"); 231 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 232 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 233 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 234 &VNET_NAME(carp_log), 0, "CARP log level"); 235 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 236 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 237 0, 0, carp_demote_adj_sysctl, "I", 238 "Adjust demotion factor (skew of advskew)"); 239 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 240 CTLFLAG_VNET | CTLFLAG_RW, 241 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 242 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 243 CTLFLAG_VNET | CTLFLAG_RW, 244 &VNET_NAME(carp_ifdown_adj), 0, 245 "Interface down demotion factor adjustment"); 246 247 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 248 VNET_PCPUSTAT_SYSINIT(carpstats); 249 VNET_PCPUSTAT_SYSUNINIT(carpstats); 250 251 #define CARPSTATS_ADD(name, val) \ 252 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 253 sizeof(uint64_t)], (val)) 254 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 255 256 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 257 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 258 259 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 260 NULL, MTX_DEF) 261 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 262 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 263 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 264 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 265 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 266 NULL, MTX_DEF) 267 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 268 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 269 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 270 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 271 #define CIF_FREE(cif) do { \ 272 CIF_LOCK(cif); \ 273 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 274 carp_free_if(cif); \ 275 else \ 276 CIF_UNLOCK(cif); \ 277 } while (0) 278 279 #define CARP_LOG(...) do { \ 280 if (V_carp_log > 0) \ 281 log(LOG_INFO, "carp: " __VA_ARGS__); \ 282 } while (0) 283 284 #define CARP_DEBUG(...) do { \ 285 if (V_carp_log > 1) \ 286 log(LOG_DEBUG, __VA_ARGS__); \ 287 } while (0) 288 289 #define IFNET_FOREACH_IFA(ifp, ifa) \ 290 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 291 if ((ifa)->ifa_carp != NULL) 292 293 #define CARP_FOREACH_IFA(sc, ifa) \ 294 CARP_LOCK_ASSERT(sc); \ 295 for (int _i = 0; \ 296 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 297 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 298 ++_i) 299 300 #define IFNET_FOREACH_CARP(ifp, sc) \ 301 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 302 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 303 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 304 305 #define DEMOTE_ADVSKEW(sc) \ 306 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 307 CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) 308 309 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 310 static struct carp_softc 311 *carp_alloc(struct ifnet *); 312 static void carp_destroy(struct carp_softc *); 313 static struct carp_if 314 *carp_alloc_if(struct ifnet *); 315 static void carp_free_if(struct carp_if *); 316 static void carp_set_state(struct carp_softc *, int, const char* reason); 317 static void carp_sc_state(struct carp_softc *); 318 static void carp_setrun(struct carp_softc *, sa_family_t); 319 static void carp_master_down(void *); 320 static void carp_master_down_locked(struct carp_softc *, 321 const char* reason); 322 static void carp_send_ad(void *); 323 static void carp_send_ad_locked(struct carp_softc *); 324 static void carp_addroute(struct carp_softc *); 325 static void carp_ifa_addroute(struct ifaddr *); 326 static void carp_delroute(struct carp_softc *); 327 static void carp_ifa_delroute(struct ifaddr *); 328 static void carp_send_ad_all(void *, int); 329 static void carp_demote_adj(int, char *); 330 331 static LIST_HEAD(, carp_softc) carp_list; 332 static struct mtx carp_mtx; 333 static struct sx carp_sx; 334 static struct task carp_sendall_task = 335 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 336 337 static void 338 carp_hmac_prepare(struct carp_softc *sc) 339 { 340 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 341 uint8_t vhid = sc->sc_vhid & 0xff; 342 struct ifaddr *ifa; 343 int i, found; 344 #ifdef INET 345 struct in_addr last, cur, in; 346 #endif 347 #ifdef INET6 348 struct in6_addr last6, cur6, in6; 349 #endif 350 351 CARP_LOCK_ASSERT(sc); 352 353 /* Compute ipad from key. */ 354 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 355 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 356 for (i = 0; i < sizeof(sc->sc_pad); i++) 357 sc->sc_pad[i] ^= 0x36; 358 359 /* Precompute first part of inner hash. */ 360 SHA1Init(&sc->sc_sha1); 361 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 362 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 363 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 364 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 365 #ifdef INET 366 cur.s_addr = 0; 367 do { 368 found = 0; 369 last = cur; 370 cur.s_addr = 0xffffffff; 371 CARP_FOREACH_IFA(sc, ifa) { 372 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 373 if (ifa->ifa_addr->sa_family == AF_INET && 374 ntohl(in.s_addr) > ntohl(last.s_addr) && 375 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 376 cur.s_addr = in.s_addr; 377 found++; 378 } 379 } 380 if (found) 381 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 382 } while (found); 383 #endif /* INET */ 384 #ifdef INET6 385 memset(&cur6, 0, sizeof(cur6)); 386 do { 387 found = 0; 388 last6 = cur6; 389 memset(&cur6, 0xff, sizeof(cur6)); 390 CARP_FOREACH_IFA(sc, ifa) { 391 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 392 if (IN6_IS_SCOPE_EMBED(&in6)) 393 in6.s6_addr16[1] = 0; 394 if (ifa->ifa_addr->sa_family == AF_INET6 && 395 memcmp(&in6, &last6, sizeof(in6)) > 0 && 396 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 397 cur6 = in6; 398 found++; 399 } 400 } 401 if (found) 402 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 403 } while (found); 404 #endif /* INET6 */ 405 406 /* convert ipad to opad */ 407 for (i = 0; i < sizeof(sc->sc_pad); i++) 408 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 409 } 410 411 static void 412 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 413 unsigned char md[20]) 414 { 415 SHA1_CTX sha1ctx; 416 417 CARP_LOCK_ASSERT(sc); 418 419 /* fetch first half of inner hash */ 420 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 421 422 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 423 SHA1Final(md, &sha1ctx); 424 425 /* outer hash */ 426 SHA1Init(&sha1ctx); 427 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 428 SHA1Update(&sha1ctx, md, 20); 429 SHA1Final(md, &sha1ctx); 430 } 431 432 static int 433 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 434 unsigned char md[20]) 435 { 436 unsigned char md2[20]; 437 438 CARP_LOCK_ASSERT(sc); 439 440 carp_hmac_generate(sc, counter, md2); 441 442 return (bcmp(md, md2, sizeof(md2))); 443 } 444 445 /* 446 * process input packet. 447 * we have rearranged checks order compared to the rfc, 448 * but it seems more efficient this way or not possible otherwise. 449 */ 450 #ifdef INET 451 int 452 carp_input(struct mbuf **mp, int *offp, int proto) 453 { 454 struct mbuf *m = *mp; 455 struct ip *ip = mtod(m, struct ip *); 456 struct carp_header *ch; 457 int iplen, len; 458 459 iplen = *offp; 460 *mp = NULL; 461 462 CARPSTATS_INC(carps_ipackets); 463 464 if (!V_carp_allow) { 465 m_freem(m); 466 return (IPPROTO_DONE); 467 } 468 469 /* verify that the IP TTL is 255. */ 470 if (ip->ip_ttl != CARP_DFLTTL) { 471 CARPSTATS_INC(carps_badttl); 472 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 473 ip->ip_ttl, 474 m->m_pkthdr.rcvif->if_xname); 475 m_freem(m); 476 return (IPPROTO_DONE); 477 } 478 479 iplen = ip->ip_hl << 2; 480 481 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 482 CARPSTATS_INC(carps_badlen); 483 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 484 "on %s\n", __func__, m->m_len - sizeof(struct ip), 485 m->m_pkthdr.rcvif->if_xname); 486 m_freem(m); 487 return (IPPROTO_DONE); 488 } 489 490 if (iplen + sizeof(*ch) < m->m_len) { 491 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 492 CARPSTATS_INC(carps_hdrops); 493 CARP_DEBUG("%s: pullup failed\n", __func__); 494 return (IPPROTO_DONE); 495 } 496 ip = mtod(m, struct ip *); 497 } 498 ch = (struct carp_header *)((char *)ip + iplen); 499 500 /* 501 * verify that the received packet length is 502 * equal to the CARP header 503 */ 504 len = iplen + sizeof(*ch); 505 if (len > m->m_pkthdr.len) { 506 CARPSTATS_INC(carps_badlen); 507 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 508 m->m_pkthdr.len, 509 m->m_pkthdr.rcvif->if_xname); 510 m_freem(m); 511 return (IPPROTO_DONE); 512 } 513 514 if ((m = m_pullup(m, len)) == NULL) { 515 CARPSTATS_INC(carps_hdrops); 516 return (IPPROTO_DONE); 517 } 518 ip = mtod(m, struct ip *); 519 ch = (struct carp_header *)((char *)ip + iplen); 520 521 /* verify the CARP checksum */ 522 m->m_data += iplen; 523 if (in_cksum(m, len - iplen)) { 524 CARPSTATS_INC(carps_badsum); 525 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 526 m->m_pkthdr.rcvif->if_xname); 527 m_freem(m); 528 return (IPPROTO_DONE); 529 } 530 m->m_data -= iplen; 531 532 carp_input_c(m, ch, AF_INET); 533 return (IPPROTO_DONE); 534 } 535 #endif 536 537 #ifdef INET6 538 int 539 carp6_input(struct mbuf **mp, int *offp, int proto) 540 { 541 struct mbuf *m = *mp; 542 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 543 struct carp_header *ch; 544 u_int len; 545 546 CARPSTATS_INC(carps_ipackets6); 547 548 if (!V_carp_allow) { 549 m_freem(m); 550 return (IPPROTO_DONE); 551 } 552 553 /* check if received on a valid carp interface */ 554 if (m->m_pkthdr.rcvif->if_carp == NULL) { 555 CARPSTATS_INC(carps_badif); 556 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 557 __func__, m->m_pkthdr.rcvif->if_xname); 558 m_freem(m); 559 return (IPPROTO_DONE); 560 } 561 562 /* verify that the IP TTL is 255 */ 563 if (ip6->ip6_hlim != CARP_DFLTTL) { 564 CARPSTATS_INC(carps_badttl); 565 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 566 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 567 m_freem(m); 568 return (IPPROTO_DONE); 569 } 570 571 /* verify that we have a complete carp packet */ 572 if (m->m_len < *offp + sizeof(*ch)) { 573 len = m->m_len; 574 m = m_pullup(m, *offp + sizeof(*ch)); 575 if (m == NULL) { 576 CARPSTATS_INC(carps_badlen); 577 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 578 return (IPPROTO_DONE); 579 } 580 } 581 ch = (struct carp_header *)(mtod(m, char *) + *offp); 582 583 584 /* verify the CARP checksum */ 585 m->m_data += *offp; 586 if (in_cksum(m, sizeof(*ch))) { 587 CARPSTATS_INC(carps_badsum); 588 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 589 m->m_pkthdr.rcvif->if_xname); 590 m_freem(m); 591 return (IPPROTO_DONE); 592 } 593 m->m_data -= *offp; 594 595 carp_input_c(m, ch, AF_INET6); 596 return (IPPROTO_DONE); 597 } 598 #endif /* INET6 */ 599 600 /* 601 * This routine should not be necessary at all, but some switches 602 * (VMWare ESX vswitches) can echo our own packets back at us, 603 * and we must ignore them or they will cause us to drop out of 604 * MASTER mode. 605 * 606 * We cannot catch all cases of network loops. Instead, what we 607 * do here is catch any packet that arrives with a carp header 608 * with a VHID of 0, that comes from an address that is our own. 609 * These packets are by definition "from us" (even if they are from 610 * a misconfigured host that is pretending to be us). 611 * 612 * The VHID test is outside this mini-function. 613 */ 614 static int 615 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 616 { 617 #ifdef INET 618 struct ip *ip4; 619 struct in_addr in4; 620 #endif 621 #ifdef INET6 622 struct ip6_hdr *ip6; 623 struct in6_addr in6; 624 #endif 625 626 switch (af) { 627 #ifdef INET 628 case AF_INET: 629 ip4 = mtod(m, struct ip *); 630 in4 = ifatoia(ifa)->ia_addr.sin_addr; 631 return (in4.s_addr == ip4->ip_src.s_addr); 632 #endif 633 #ifdef INET6 634 case AF_INET6: 635 ip6 = mtod(m, struct ip6_hdr *); 636 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 637 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 638 #endif 639 default: 640 break; 641 } 642 return (0); 643 } 644 645 static void 646 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 647 { 648 struct ifnet *ifp = m->m_pkthdr.rcvif; 649 struct ifaddr *ifa, *match; 650 struct carp_softc *sc; 651 uint64_t tmp_counter; 652 struct timeval sc_tv, ch_tv; 653 int error; 654 655 NET_EPOCH_ASSERT(); 656 657 /* 658 * Verify that the VHID is valid on the receiving interface. 659 * 660 * There should be just one match. If there are none 661 * the VHID is not valid and we drop the packet. If 662 * there are multiple VHID matches, take just the first 663 * one, for compatibility with previous code. While we're 664 * scanning, check for obvious loops in the network topology 665 * (these should never happen, and as noted above, we may 666 * miss real loops; this is just a double-check). 667 */ 668 error = 0; 669 match = NULL; 670 IFNET_FOREACH_IFA(ifp, ifa) { 671 if (match == NULL && ifa->ifa_carp != NULL && 672 ifa->ifa_addr->sa_family == af && 673 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 674 match = ifa; 675 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 676 error = ELOOP; 677 } 678 ifa = error ? NULL : match; 679 if (ifa != NULL) 680 ifa_ref(ifa); 681 682 if (ifa == NULL) { 683 if (error == ELOOP) { 684 CARP_DEBUG("dropping looped packet on interface %s\n", 685 ifp->if_xname); 686 CARPSTATS_INC(carps_badif); /* ??? */ 687 } else { 688 CARPSTATS_INC(carps_badvhid); 689 } 690 m_freem(m); 691 return; 692 } 693 694 /* verify the CARP version. */ 695 if (ch->carp_version != CARP_VERSION) { 696 CARPSTATS_INC(carps_badver); 697 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 698 ch->carp_version); 699 ifa_free(ifa); 700 m_freem(m); 701 return; 702 } 703 704 sc = ifa->ifa_carp; 705 CARP_LOCK(sc); 706 ifa_free(ifa); 707 708 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 709 CARPSTATS_INC(carps_badauth); 710 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 711 sc->sc_vhid, ifp->if_xname); 712 goto out; 713 } 714 715 tmp_counter = ntohl(ch->carp_counter[0]); 716 tmp_counter = tmp_counter<<32; 717 tmp_counter += ntohl(ch->carp_counter[1]); 718 719 /* XXX Replay protection goes here */ 720 721 sc->sc_init_counter = 0; 722 sc->sc_counter = tmp_counter; 723 724 sc_tv.tv_sec = sc->sc_advbase; 725 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 726 ch_tv.tv_sec = ch->carp_advbase; 727 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 728 729 switch (sc->sc_state) { 730 case INIT: 731 break; 732 case MASTER: 733 /* 734 * If we receive an advertisement from a master who's going to 735 * be more frequent than us, go into BACKUP state. 736 */ 737 if (timevalcmp(&sc_tv, &ch_tv, >) || 738 timevalcmp(&sc_tv, &ch_tv, ==)) { 739 callout_stop(&sc->sc_ad_tmo); 740 carp_set_state(sc, BACKUP, 741 "more frequent advertisement received"); 742 carp_setrun(sc, 0); 743 carp_delroute(sc); 744 } 745 break; 746 case BACKUP: 747 /* 748 * If we're pre-empting masters who advertise slower than us, 749 * and this one claims to be slower, treat him as down. 750 */ 751 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 752 carp_master_down_locked(sc, 753 "preempting a slower master"); 754 break; 755 } 756 757 /* 758 * If the master is going to advertise at such a low frequency 759 * that he's guaranteed to time out, we'd might as well just 760 * treat him as timed out now. 761 */ 762 sc_tv.tv_sec = sc->sc_advbase * 3; 763 if (timevalcmp(&sc_tv, &ch_tv, <)) { 764 carp_master_down_locked(sc, "master will time out"); 765 break; 766 } 767 768 /* 769 * Otherwise, we reset the counter and wait for the next 770 * advertisement. 771 */ 772 carp_setrun(sc, af); 773 break; 774 } 775 776 out: 777 CARP_UNLOCK(sc); 778 m_freem(m); 779 } 780 781 static int 782 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 783 { 784 struct m_tag *mtag; 785 786 if (sc->sc_init_counter) { 787 /* this could also be seconds since unix epoch */ 788 sc->sc_counter = arc4random(); 789 sc->sc_counter = sc->sc_counter << 32; 790 sc->sc_counter += arc4random(); 791 } else 792 sc->sc_counter++; 793 794 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 795 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 796 797 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 798 799 /* Tag packet for carp_output */ 800 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 801 M_NOWAIT)) == NULL) { 802 m_freem(m); 803 CARPSTATS_INC(carps_onomem); 804 return (ENOMEM); 805 } 806 bcopy(&sc, mtag + 1, sizeof(sc)); 807 m_tag_prepend(m, mtag); 808 809 return (0); 810 } 811 812 /* 813 * To avoid LORs and possible recursions this function shouldn't 814 * be called directly, but scheduled via taskqueue. 815 */ 816 static void 817 carp_send_ad_all(void *ctx __unused, int pending __unused) 818 { 819 struct carp_softc *sc; 820 821 mtx_lock(&carp_mtx); 822 LIST_FOREACH(sc, &carp_list, sc_next) 823 if (sc->sc_state == MASTER) { 824 CARP_LOCK(sc); 825 CURVNET_SET(sc->sc_carpdev->if_vnet); 826 carp_send_ad_locked(sc); 827 CURVNET_RESTORE(); 828 CARP_UNLOCK(sc); 829 } 830 mtx_unlock(&carp_mtx); 831 } 832 833 /* Send a periodic advertisement, executed in callout context. */ 834 static void 835 carp_send_ad(void *v) 836 { 837 struct carp_softc *sc = v; 838 839 CARP_LOCK_ASSERT(sc); 840 CURVNET_SET(sc->sc_carpdev->if_vnet); 841 carp_send_ad_locked(sc); 842 CURVNET_RESTORE(); 843 CARP_UNLOCK(sc); 844 } 845 846 static void 847 carp_send_ad_error(struct carp_softc *sc, int error) 848 { 849 850 if (error) { 851 if (sc->sc_sendad_errors < INT_MAX) 852 sc->sc_sendad_errors++; 853 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 854 static const char fmt[] = "send error %d on %s"; 855 char msg[sizeof(fmt) + IFNAMSIZ]; 856 857 sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); 858 carp_demote_adj(V_carp_senderr_adj, msg); 859 } 860 sc->sc_sendad_success = 0; 861 } else { 862 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && 863 ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 864 static const char fmt[] = "send ok on %s"; 865 char msg[sizeof(fmt) + IFNAMSIZ]; 866 867 sprintf(msg, fmt, sc->sc_carpdev->if_xname); 868 carp_demote_adj(-V_carp_senderr_adj, msg); 869 sc->sc_sendad_errors = 0; 870 } else 871 sc->sc_sendad_errors = 0; 872 } 873 } 874 875 /* 876 * Pick the best ifaddr on the given ifp for sending CARP 877 * advertisements. 878 * 879 * "Best" here is defined by ifa_preferred(). This function is much 880 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 881 * 882 * (This could be simplified to return the actual address, except that 883 * it has a different format in AF_INET and AF_INET6.) 884 */ 885 static struct ifaddr * 886 carp_best_ifa(int af, struct ifnet *ifp) 887 { 888 struct ifaddr *ifa, *best; 889 890 NET_EPOCH_ASSERT(); 891 892 if (af >= AF_MAX) 893 return (NULL); 894 best = NULL; 895 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 896 if (ifa->ifa_addr->sa_family == af && 897 (best == NULL || ifa_preferred(best, ifa))) 898 best = ifa; 899 } 900 if (best != NULL) 901 ifa_ref(best); 902 return (best); 903 } 904 905 static void 906 carp_send_ad_locked(struct carp_softc *sc) 907 { 908 struct carp_header ch; 909 struct timeval tv; 910 struct epoch_tracker et; 911 struct ifaddr *ifa; 912 struct carp_header *ch_ptr; 913 struct mbuf *m; 914 int len, advskew; 915 916 CARP_LOCK_ASSERT(sc); 917 918 advskew = DEMOTE_ADVSKEW(sc); 919 tv.tv_sec = sc->sc_advbase; 920 tv.tv_usec = advskew * 1000000 / 256; 921 922 ch.carp_version = CARP_VERSION; 923 ch.carp_type = CARP_ADVERTISEMENT; 924 ch.carp_vhid = sc->sc_vhid; 925 ch.carp_advbase = sc->sc_advbase; 926 ch.carp_advskew = advskew; 927 ch.carp_authlen = 7; /* XXX DEFINE */ 928 ch.carp_pad1 = 0; /* must be zero */ 929 ch.carp_cksum = 0; 930 931 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 932 933 #ifdef INET 934 if (sc->sc_naddrs) { 935 struct ip *ip; 936 937 m = m_gethdr(M_NOWAIT, MT_DATA); 938 if (m == NULL) { 939 CARPSTATS_INC(carps_onomem); 940 goto resched; 941 } 942 len = sizeof(*ip) + sizeof(ch); 943 m->m_pkthdr.len = len; 944 m->m_pkthdr.rcvif = NULL; 945 m->m_len = len; 946 M_ALIGN(m, m->m_len); 947 m->m_flags |= M_MCAST; 948 ip = mtod(m, struct ip *); 949 ip->ip_v = IPVERSION; 950 ip->ip_hl = sizeof(*ip) >> 2; 951 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 952 ip->ip_len = htons(len); 953 ip->ip_off = htons(IP_DF); 954 ip->ip_ttl = CARP_DFLTTL; 955 ip->ip_p = IPPROTO_CARP; 956 ip->ip_sum = 0; 957 ip_fillid(ip); 958 959 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 960 if (ifa != NULL) { 961 ip->ip_src.s_addr = 962 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 963 ifa_free(ifa); 964 } else 965 ip->ip_src.s_addr = 0; 966 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 967 968 ch_ptr = (struct carp_header *)(&ip[1]); 969 bcopy(&ch, ch_ptr, sizeof(ch)); 970 if (carp_prepare_ad(m, sc, ch_ptr)) 971 goto resched; 972 973 m->m_data += sizeof(*ip); 974 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 975 m->m_data -= sizeof(*ip); 976 977 CARPSTATS_INC(carps_opackets); 978 979 NET_EPOCH_ENTER(et); 980 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 981 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 982 NET_EPOCH_EXIT(et); 983 } 984 #endif /* INET */ 985 #ifdef INET6 986 if (sc->sc_naddrs6) { 987 struct ip6_hdr *ip6; 988 989 m = m_gethdr(M_NOWAIT, MT_DATA); 990 if (m == NULL) { 991 CARPSTATS_INC(carps_onomem); 992 goto resched; 993 } 994 len = sizeof(*ip6) + sizeof(ch); 995 m->m_pkthdr.len = len; 996 m->m_pkthdr.rcvif = NULL; 997 m->m_len = len; 998 M_ALIGN(m, m->m_len); 999 m->m_flags |= M_MCAST; 1000 ip6 = mtod(m, struct ip6_hdr *); 1001 bzero(ip6, sizeof(*ip6)); 1002 ip6->ip6_vfc |= IPV6_VERSION; 1003 /* Traffic class isn't defined in ip6 struct instead 1004 * it gets offset into flowid field */ 1005 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1006 IPTOS_DSCP_OFFSET)); 1007 ip6->ip6_hlim = CARP_DFLTTL; 1008 ip6->ip6_nxt = IPPROTO_CARP; 1009 1010 /* set the source address */ 1011 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1012 if (ifa != NULL) { 1013 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1014 sizeof(struct in6_addr)); 1015 ifa_free(ifa); 1016 } else 1017 /* This should never happen with IPv6. */ 1018 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1019 1020 /* Set the multicast destination. */ 1021 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1022 ip6->ip6_dst.s6_addr8[15] = 0x12; 1023 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1024 m_freem(m); 1025 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1026 goto resched; 1027 } 1028 1029 ch_ptr = (struct carp_header *)(&ip6[1]); 1030 bcopy(&ch, ch_ptr, sizeof(ch)); 1031 if (carp_prepare_ad(m, sc, ch_ptr)) 1032 goto resched; 1033 1034 m->m_data += sizeof(*ip6); 1035 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1036 m->m_data -= sizeof(*ip6); 1037 1038 CARPSTATS_INC(carps_opackets6); 1039 1040 NET_EPOCH_ENTER(et); 1041 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1042 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1043 NET_EPOCH_EXIT(et); 1044 } 1045 #endif /* INET6 */ 1046 1047 resched: 1048 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1049 } 1050 1051 static void 1052 carp_addroute(struct carp_softc *sc) 1053 { 1054 struct ifaddr *ifa; 1055 1056 CARP_FOREACH_IFA(sc, ifa) 1057 carp_ifa_addroute(ifa); 1058 } 1059 1060 static void 1061 carp_ifa_addroute(struct ifaddr *ifa) 1062 { 1063 1064 switch (ifa->ifa_addr->sa_family) { 1065 #ifdef INET 1066 case AF_INET: 1067 in_addprefix(ifatoia(ifa), RTF_UP); 1068 ifa_add_loopback_route(ifa, 1069 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1070 break; 1071 #endif 1072 #ifdef INET6 1073 case AF_INET6: 1074 ifa_add_loopback_route(ifa, 1075 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1076 nd6_add_ifa_lle(ifatoia6(ifa)); 1077 break; 1078 #endif 1079 } 1080 } 1081 1082 static void 1083 carp_delroute(struct carp_softc *sc) 1084 { 1085 struct ifaddr *ifa; 1086 1087 CARP_FOREACH_IFA(sc, ifa) 1088 carp_ifa_delroute(ifa); 1089 } 1090 1091 static void 1092 carp_ifa_delroute(struct ifaddr *ifa) 1093 { 1094 1095 switch (ifa->ifa_addr->sa_family) { 1096 #ifdef INET 1097 case AF_INET: 1098 ifa_del_loopback_route(ifa, 1099 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1100 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1101 break; 1102 #endif 1103 #ifdef INET6 1104 case AF_INET6: 1105 ifa_del_loopback_route(ifa, 1106 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1107 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1108 break; 1109 #endif 1110 } 1111 } 1112 1113 int 1114 carp_master(struct ifaddr *ifa) 1115 { 1116 struct carp_softc *sc = ifa->ifa_carp; 1117 1118 return (sc->sc_state == MASTER); 1119 } 1120 1121 #ifdef INET 1122 /* 1123 * Broadcast a gratuitous ARP request containing 1124 * the virtual router MAC address for each IP address 1125 * associated with the virtual router. 1126 */ 1127 static void 1128 carp_send_arp(struct carp_softc *sc) 1129 { 1130 struct ifaddr *ifa; 1131 struct in_addr addr; 1132 1133 CARP_FOREACH_IFA(sc, ifa) { 1134 if (ifa->ifa_addr->sa_family != AF_INET) 1135 continue; 1136 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1137 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1138 } 1139 } 1140 1141 int 1142 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1143 { 1144 struct carp_softc *sc = ifa->ifa_carp; 1145 1146 if (sc->sc_state == MASTER) { 1147 *enaddr = LLADDR(&sc->sc_addr); 1148 return (1); 1149 } 1150 1151 return (0); 1152 } 1153 #endif 1154 1155 #ifdef INET6 1156 static void 1157 carp_send_na(struct carp_softc *sc) 1158 { 1159 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1160 struct ifaddr *ifa; 1161 struct in6_addr *in6; 1162 1163 CARP_FOREACH_IFA(sc, ifa) { 1164 if (ifa->ifa_addr->sa_family != AF_INET6) 1165 continue; 1166 1167 in6 = IFA_IN6(ifa); 1168 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1169 ND_NA_FLAG_OVERRIDE, 1, NULL); 1170 DELAY(1000); /* XXX */ 1171 } 1172 } 1173 1174 /* 1175 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1176 * matches and is not a carp address. Returns NULL otherwise. 1177 */ 1178 struct ifaddr * 1179 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1180 { 1181 struct ifaddr *ifa; 1182 1183 NET_EPOCH_ASSERT(); 1184 1185 ifa = NULL; 1186 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1187 if (ifa->ifa_addr->sa_family != AF_INET6) 1188 continue; 1189 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1190 continue; 1191 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1192 ifa = NULL; 1193 else 1194 ifa_ref(ifa); 1195 break; 1196 } 1197 1198 return (ifa); 1199 } 1200 1201 char * 1202 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1203 { 1204 struct ifaddr *ifa; 1205 1206 NET_EPOCH_ASSERT(); 1207 1208 IFNET_FOREACH_IFA(ifp, ifa) 1209 if (ifa->ifa_addr->sa_family == AF_INET6 && 1210 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1211 struct carp_softc *sc = ifa->ifa_carp; 1212 struct m_tag *mtag; 1213 1214 mtag = m_tag_get(PACKET_TAG_CARP, 1215 sizeof(struct carp_softc *), M_NOWAIT); 1216 if (mtag == NULL) 1217 /* Better a bit than nothing. */ 1218 return (LLADDR(&sc->sc_addr)); 1219 1220 bcopy(&sc, mtag + 1, sizeof(sc)); 1221 m_tag_prepend(m, mtag); 1222 1223 return (LLADDR(&sc->sc_addr)); 1224 } 1225 1226 return (NULL); 1227 } 1228 #endif /* INET6 */ 1229 1230 int 1231 carp_forus(struct ifnet *ifp, u_char *dhost) 1232 { 1233 struct carp_softc *sc; 1234 uint8_t *ena = dhost; 1235 1236 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1237 return (0); 1238 1239 CIF_LOCK(ifp->if_carp); 1240 IFNET_FOREACH_CARP(ifp, sc) { 1241 /* 1242 * CARP_LOCK() is not here, since would protect nothing, but 1243 * cause deadlock with if_bridge, calling this under its lock. 1244 */ 1245 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1246 ETHER_ADDR_LEN)) { 1247 CIF_UNLOCK(ifp->if_carp); 1248 return (1); 1249 } 1250 } 1251 CIF_UNLOCK(ifp->if_carp); 1252 1253 return (0); 1254 } 1255 1256 /* Master down timeout event, executed in callout context. */ 1257 static void 1258 carp_master_down(void *v) 1259 { 1260 struct carp_softc *sc = v; 1261 1262 CARP_LOCK_ASSERT(sc); 1263 1264 CURVNET_SET(sc->sc_carpdev->if_vnet); 1265 if (sc->sc_state == BACKUP) { 1266 carp_master_down_locked(sc, "master timed out"); 1267 } 1268 CURVNET_RESTORE(); 1269 1270 CARP_UNLOCK(sc); 1271 } 1272 1273 static void 1274 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1275 { 1276 1277 CARP_LOCK_ASSERT(sc); 1278 1279 switch (sc->sc_state) { 1280 case BACKUP: 1281 carp_set_state(sc, MASTER, reason); 1282 carp_send_ad_locked(sc); 1283 #ifdef INET 1284 carp_send_arp(sc); 1285 #endif 1286 #ifdef INET6 1287 carp_send_na(sc); 1288 #endif 1289 carp_setrun(sc, 0); 1290 carp_addroute(sc); 1291 break; 1292 case INIT: 1293 case MASTER: 1294 #ifdef INVARIANTS 1295 panic("carp: VHID %u@%s: master_down event in %s state\n", 1296 sc->sc_vhid, 1297 sc->sc_carpdev->if_xname, 1298 sc->sc_state ? "MASTER" : "INIT"); 1299 #endif 1300 break; 1301 } 1302 } 1303 1304 /* 1305 * When in backup state, af indicates whether to reset the master down timer 1306 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1307 */ 1308 static void 1309 carp_setrun(struct carp_softc *sc, sa_family_t af) 1310 { 1311 struct timeval tv; 1312 1313 CARP_LOCK_ASSERT(sc); 1314 1315 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1316 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1317 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1318 !V_carp_allow) 1319 return; 1320 1321 switch (sc->sc_state) { 1322 case INIT: 1323 carp_set_state(sc, BACKUP, "initialization complete"); 1324 carp_setrun(sc, 0); 1325 break; 1326 case BACKUP: 1327 callout_stop(&sc->sc_ad_tmo); 1328 tv.tv_sec = 3 * sc->sc_advbase; 1329 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1330 switch (af) { 1331 #ifdef INET 1332 case AF_INET: 1333 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1334 carp_master_down, sc); 1335 break; 1336 #endif 1337 #ifdef INET6 1338 case AF_INET6: 1339 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1340 carp_master_down, sc); 1341 break; 1342 #endif 1343 default: 1344 #ifdef INET 1345 if (sc->sc_naddrs) 1346 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1347 carp_master_down, sc); 1348 #endif 1349 #ifdef INET6 1350 if (sc->sc_naddrs6) 1351 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1352 carp_master_down, sc); 1353 #endif 1354 break; 1355 } 1356 break; 1357 case MASTER: 1358 tv.tv_sec = sc->sc_advbase; 1359 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1360 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1361 carp_send_ad, sc); 1362 break; 1363 } 1364 } 1365 1366 /* 1367 * Setup multicast structures. 1368 */ 1369 static int 1370 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1371 { 1372 struct ifnet *ifp = cif->cif_ifp; 1373 int error = 0; 1374 1375 switch (sa) { 1376 #ifdef INET 1377 case AF_INET: 1378 { 1379 struct ip_moptions *imo = &cif->cif_imo; 1380 struct in_mfilter *imf; 1381 struct in_addr addr; 1382 1383 if (ip_mfilter_first(&imo->imo_head) != NULL) 1384 return (0); 1385 1386 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1387 ip_mfilter_init(&imo->imo_head); 1388 imo->imo_multicast_vif = -1; 1389 1390 addr.s_addr = htonl(INADDR_CARP_GROUP); 1391 if ((error = in_joingroup(ifp, &addr, NULL, 1392 &imf->imf_inm)) != 0) { 1393 ip_mfilter_free(imf); 1394 break; 1395 } 1396 1397 ip_mfilter_insert(&imo->imo_head, imf); 1398 imo->imo_multicast_ifp = ifp; 1399 imo->imo_multicast_ttl = CARP_DFLTTL; 1400 imo->imo_multicast_loop = 0; 1401 break; 1402 } 1403 #endif 1404 #ifdef INET6 1405 case AF_INET6: 1406 { 1407 struct ip6_moptions *im6o = &cif->cif_im6o; 1408 struct in6_mfilter *im6f[2]; 1409 struct in6_addr in6; 1410 1411 if (ip6_mfilter_first(&im6o->im6o_head)) 1412 return (0); 1413 1414 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1415 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1416 1417 ip6_mfilter_init(&im6o->im6o_head); 1418 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1419 im6o->im6o_multicast_ifp = ifp; 1420 1421 /* Join IPv6 CARP multicast group. */ 1422 bzero(&in6, sizeof(in6)); 1423 in6.s6_addr16[0] = htons(0xff02); 1424 in6.s6_addr8[15] = 0x12; 1425 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1426 ip6_mfilter_free(im6f[0]); 1427 ip6_mfilter_free(im6f[1]); 1428 break; 1429 } 1430 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1431 ip6_mfilter_free(im6f[0]); 1432 ip6_mfilter_free(im6f[1]); 1433 break; 1434 } 1435 1436 /* Join solicited multicast address. */ 1437 bzero(&in6, sizeof(in6)); 1438 in6.s6_addr16[0] = htons(0xff02); 1439 in6.s6_addr32[1] = 0; 1440 in6.s6_addr32[2] = htonl(1); 1441 in6.s6_addr32[3] = 0; 1442 in6.s6_addr8[12] = 0xff; 1443 1444 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1445 ip6_mfilter_free(im6f[0]); 1446 ip6_mfilter_free(im6f[1]); 1447 break; 1448 } 1449 1450 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1451 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1452 ip6_mfilter_free(im6f[0]); 1453 ip6_mfilter_free(im6f[1]); 1454 break; 1455 } 1456 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1457 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1458 break; 1459 } 1460 #endif 1461 } 1462 1463 return (error); 1464 } 1465 1466 /* 1467 * Free multicast structures. 1468 */ 1469 static void 1470 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1471 { 1472 #ifdef INET 1473 struct ip_moptions *imo = &cif->cif_imo; 1474 struct in_mfilter *imf; 1475 #endif 1476 #ifdef INET6 1477 struct ip6_moptions *im6o = &cif->cif_im6o; 1478 struct in6_mfilter *im6f; 1479 #endif 1480 sx_assert(&carp_sx, SA_XLOCKED); 1481 1482 switch (sa) { 1483 #ifdef INET 1484 case AF_INET: 1485 if (cif->cif_naddrs != 0) 1486 break; 1487 1488 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1489 ip_mfilter_remove(&imo->imo_head, imf); 1490 in_leavegroup(imf->imf_inm, NULL); 1491 ip_mfilter_free(imf); 1492 } 1493 break; 1494 #endif 1495 #ifdef INET6 1496 case AF_INET6: 1497 if (cif->cif_naddrs6 != 0) 1498 break; 1499 1500 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 1501 ip6_mfilter_remove(&im6o->im6o_head, im6f); 1502 in6_leavegroup(im6f->im6f_in6m, NULL); 1503 ip6_mfilter_free(im6f); 1504 } 1505 break; 1506 #endif 1507 } 1508 } 1509 1510 int 1511 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1512 { 1513 struct m_tag *mtag; 1514 struct carp_softc *sc; 1515 1516 if (!sa) 1517 return (0); 1518 1519 switch (sa->sa_family) { 1520 #ifdef INET 1521 case AF_INET: 1522 break; 1523 #endif 1524 #ifdef INET6 1525 case AF_INET6: 1526 break; 1527 #endif 1528 default: 1529 return (0); 1530 } 1531 1532 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1533 if (mtag == NULL) 1534 return (0); 1535 1536 bcopy(mtag + 1, &sc, sizeof(sc)); 1537 1538 /* Set the source MAC address to the Virtual Router MAC Address. */ 1539 switch (ifp->if_type) { 1540 case IFT_ETHER: 1541 case IFT_BRIDGE: 1542 case IFT_L2VLAN: { 1543 struct ether_header *eh; 1544 1545 eh = mtod(m, struct ether_header *); 1546 eh->ether_shost[0] = 0; 1547 eh->ether_shost[1] = 0; 1548 eh->ether_shost[2] = 0x5e; 1549 eh->ether_shost[3] = 0; 1550 eh->ether_shost[4] = 1; 1551 eh->ether_shost[5] = sc->sc_vhid; 1552 } 1553 break; 1554 default: 1555 printf("%s: carp is not supported for the %d interface type\n", 1556 ifp->if_xname, ifp->if_type); 1557 return (EOPNOTSUPP); 1558 } 1559 1560 return (0); 1561 } 1562 1563 static struct carp_softc* 1564 carp_alloc(struct ifnet *ifp) 1565 { 1566 struct carp_softc *sc; 1567 struct carp_if *cif; 1568 1569 sx_assert(&carp_sx, SA_XLOCKED); 1570 1571 if ((cif = ifp->if_carp) == NULL) 1572 cif = carp_alloc_if(ifp); 1573 1574 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1575 1576 sc->sc_advbase = CARP_DFLTINTV; 1577 sc->sc_vhid = -1; /* required setting */ 1578 sc->sc_init_counter = 1; 1579 sc->sc_state = INIT; 1580 1581 sc->sc_ifasiz = sizeof(struct ifaddr *); 1582 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1583 sc->sc_carpdev = ifp; 1584 1585 CARP_LOCK_INIT(sc); 1586 #ifdef INET 1587 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1588 #endif 1589 #ifdef INET6 1590 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1591 #endif 1592 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1593 1594 CIF_LOCK(cif); 1595 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1596 CIF_UNLOCK(cif); 1597 1598 mtx_lock(&carp_mtx); 1599 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1600 mtx_unlock(&carp_mtx); 1601 1602 return (sc); 1603 } 1604 1605 static void 1606 carp_grow_ifas(struct carp_softc *sc) 1607 { 1608 struct ifaddr **new; 1609 1610 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1611 CARP_LOCK(sc); 1612 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1613 free(sc->sc_ifas, M_CARP); 1614 sc->sc_ifas = new; 1615 sc->sc_ifasiz *= 2; 1616 CARP_UNLOCK(sc); 1617 } 1618 1619 static void 1620 carp_destroy(struct carp_softc *sc) 1621 { 1622 struct ifnet *ifp = sc->sc_carpdev; 1623 struct carp_if *cif = ifp->if_carp; 1624 1625 sx_assert(&carp_sx, SA_XLOCKED); 1626 1627 if (sc->sc_suppress) 1628 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1629 CARP_UNLOCK(sc); 1630 1631 CIF_LOCK(cif); 1632 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1633 CIF_UNLOCK(cif); 1634 1635 mtx_lock(&carp_mtx); 1636 LIST_REMOVE(sc, sc_next); 1637 mtx_unlock(&carp_mtx); 1638 1639 callout_drain(&sc->sc_ad_tmo); 1640 #ifdef INET 1641 callout_drain(&sc->sc_md_tmo); 1642 #endif 1643 #ifdef INET6 1644 callout_drain(&sc->sc_md6_tmo); 1645 #endif 1646 CARP_LOCK_DESTROY(sc); 1647 1648 free(sc->sc_ifas, M_CARP); 1649 free(sc, M_CARP); 1650 } 1651 1652 static struct carp_if* 1653 carp_alloc_if(struct ifnet *ifp) 1654 { 1655 struct carp_if *cif; 1656 int error; 1657 1658 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1659 1660 if ((error = ifpromisc(ifp, 1)) != 0) 1661 printf("%s: ifpromisc(%s) failed: %d\n", 1662 __func__, ifp->if_xname, error); 1663 else 1664 cif->cif_flags |= CIF_PROMISC; 1665 1666 CIF_LOCK_INIT(cif); 1667 cif->cif_ifp = ifp; 1668 TAILQ_INIT(&cif->cif_vrs); 1669 1670 IF_ADDR_WLOCK(ifp); 1671 ifp->if_carp = cif; 1672 if_ref(ifp); 1673 IF_ADDR_WUNLOCK(ifp); 1674 1675 return (cif); 1676 } 1677 1678 static void 1679 carp_free_if(struct carp_if *cif) 1680 { 1681 struct ifnet *ifp = cif->cif_ifp; 1682 1683 CIF_LOCK_ASSERT(cif); 1684 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1685 __func__)); 1686 1687 IF_ADDR_WLOCK(ifp); 1688 ifp->if_carp = NULL; 1689 IF_ADDR_WUNLOCK(ifp); 1690 1691 CIF_LOCK_DESTROY(cif); 1692 1693 if (cif->cif_flags & CIF_PROMISC) 1694 ifpromisc(ifp, 0); 1695 if_rele(ifp); 1696 1697 free(cif, M_CARP); 1698 } 1699 1700 static void 1701 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1702 { 1703 1704 CARP_LOCK(sc); 1705 carpr->carpr_state = sc->sc_state; 1706 carpr->carpr_vhid = sc->sc_vhid; 1707 carpr->carpr_advbase = sc->sc_advbase; 1708 carpr->carpr_advskew = sc->sc_advskew; 1709 if (priv) 1710 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1711 else 1712 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1713 CARP_UNLOCK(sc); 1714 } 1715 1716 int 1717 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1718 { 1719 struct carpreq carpr; 1720 struct ifnet *ifp; 1721 struct carp_softc *sc = NULL; 1722 int error = 0, locked = 0; 1723 1724 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 1725 return (error); 1726 1727 ifp = ifunit_ref(ifr->ifr_name); 1728 if (ifp == NULL) 1729 return (ENXIO); 1730 1731 switch (ifp->if_type) { 1732 case IFT_ETHER: 1733 case IFT_L2VLAN: 1734 case IFT_BRIDGE: 1735 break; 1736 default: 1737 error = EOPNOTSUPP; 1738 goto out; 1739 } 1740 1741 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1742 error = EADDRNOTAVAIL; 1743 goto out; 1744 } 1745 1746 sx_xlock(&carp_sx); 1747 switch (cmd) { 1748 case SIOCSVH: 1749 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1750 break; 1751 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1752 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1753 error = EINVAL; 1754 break; 1755 } 1756 1757 if (ifp->if_carp) { 1758 IFNET_FOREACH_CARP(ifp, sc) 1759 if (sc->sc_vhid == carpr.carpr_vhid) 1760 break; 1761 } 1762 if (sc == NULL) { 1763 sc = carp_alloc(ifp); 1764 CARP_LOCK(sc); 1765 sc->sc_vhid = carpr.carpr_vhid; 1766 LLADDR(&sc->sc_addr)[0] = 0; 1767 LLADDR(&sc->sc_addr)[1] = 0; 1768 LLADDR(&sc->sc_addr)[2] = 0x5e; 1769 LLADDR(&sc->sc_addr)[3] = 0; 1770 LLADDR(&sc->sc_addr)[4] = 1; 1771 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1772 } else 1773 CARP_LOCK(sc); 1774 locked = 1; 1775 if (carpr.carpr_advbase > 0) { 1776 if (carpr.carpr_advbase > 255 || 1777 carpr.carpr_advbase < CARP_DFLTINTV) { 1778 error = EINVAL; 1779 break; 1780 } 1781 sc->sc_advbase = carpr.carpr_advbase; 1782 } 1783 if (carpr.carpr_advskew >= 255) { 1784 error = EINVAL; 1785 break; 1786 } 1787 sc->sc_advskew = carpr.carpr_advskew; 1788 if (carpr.carpr_key[0] != '\0') { 1789 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1790 carp_hmac_prepare(sc); 1791 } 1792 if (sc->sc_state != INIT && 1793 carpr.carpr_state != sc->sc_state) { 1794 switch (carpr.carpr_state) { 1795 case BACKUP: 1796 callout_stop(&sc->sc_ad_tmo); 1797 carp_set_state(sc, BACKUP, 1798 "user requested via ifconfig"); 1799 carp_setrun(sc, 0); 1800 carp_delroute(sc); 1801 break; 1802 case MASTER: 1803 carp_master_down_locked(sc, 1804 "user requested via ifconfig"); 1805 break; 1806 default: 1807 break; 1808 } 1809 } 1810 break; 1811 1812 case SIOCGVH: 1813 { 1814 int priveleged; 1815 1816 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1817 error = EINVAL; 1818 break; 1819 } 1820 if (carpr.carpr_count < 1) { 1821 error = EMSGSIZE; 1822 break; 1823 } 1824 if (ifp->if_carp == NULL) { 1825 error = ENOENT; 1826 break; 1827 } 1828 1829 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1830 if (carpr.carpr_vhid != 0) { 1831 IFNET_FOREACH_CARP(ifp, sc) 1832 if (sc->sc_vhid == carpr.carpr_vhid) 1833 break; 1834 if (sc == NULL) { 1835 error = ENOENT; 1836 break; 1837 } 1838 carp_carprcp(&carpr, sc, priveleged); 1839 error = copyout(&carpr, ifr_data_get_ptr(ifr), 1840 sizeof(carpr)); 1841 } else { 1842 int i, count; 1843 1844 count = 0; 1845 IFNET_FOREACH_CARP(ifp, sc) 1846 count++; 1847 1848 if (count > carpr.carpr_count) { 1849 CIF_UNLOCK(ifp->if_carp); 1850 error = EMSGSIZE; 1851 break; 1852 } 1853 1854 i = 0; 1855 IFNET_FOREACH_CARP(ifp, sc) { 1856 carp_carprcp(&carpr, sc, priveleged); 1857 carpr.carpr_count = count; 1858 error = copyout(&carpr, 1859 (char *)ifr_data_get_ptr(ifr) + 1860 (i * sizeof(carpr)), sizeof(carpr)); 1861 if (error) { 1862 CIF_UNLOCK(ifp->if_carp); 1863 break; 1864 } 1865 i++; 1866 } 1867 } 1868 break; 1869 } 1870 default: 1871 error = EINVAL; 1872 } 1873 sx_xunlock(&carp_sx); 1874 1875 out: 1876 if (locked) 1877 CARP_UNLOCK(sc); 1878 if_rele(ifp); 1879 1880 return (error); 1881 } 1882 1883 static int 1884 carp_get_vhid(struct ifaddr *ifa) 1885 { 1886 1887 if (ifa == NULL || ifa->ifa_carp == NULL) 1888 return (0); 1889 1890 return (ifa->ifa_carp->sc_vhid); 1891 } 1892 1893 int 1894 carp_attach(struct ifaddr *ifa, int vhid) 1895 { 1896 struct ifnet *ifp = ifa->ifa_ifp; 1897 struct carp_if *cif = ifp->if_carp; 1898 struct carp_softc *sc; 1899 int index, error; 1900 1901 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1902 1903 switch (ifa->ifa_addr->sa_family) { 1904 #ifdef INET 1905 case AF_INET: 1906 #endif 1907 #ifdef INET6 1908 case AF_INET6: 1909 #endif 1910 break; 1911 default: 1912 return (EPROTOTYPE); 1913 } 1914 1915 sx_xlock(&carp_sx); 1916 if (ifp->if_carp == NULL) { 1917 sx_xunlock(&carp_sx); 1918 return (ENOPROTOOPT); 1919 } 1920 1921 IFNET_FOREACH_CARP(ifp, sc) 1922 if (sc->sc_vhid == vhid) 1923 break; 1924 if (sc == NULL) { 1925 sx_xunlock(&carp_sx); 1926 return (ENOENT); 1927 } 1928 1929 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1930 if (error) { 1931 CIF_FREE(cif); 1932 sx_xunlock(&carp_sx); 1933 return (error); 1934 } 1935 1936 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1937 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1938 carp_grow_ifas(sc); 1939 1940 switch (ifa->ifa_addr->sa_family) { 1941 #ifdef INET 1942 case AF_INET: 1943 cif->cif_naddrs++; 1944 sc->sc_naddrs++; 1945 break; 1946 #endif 1947 #ifdef INET6 1948 case AF_INET6: 1949 cif->cif_naddrs6++; 1950 sc->sc_naddrs6++; 1951 break; 1952 #endif 1953 } 1954 1955 ifa_ref(ifa); 1956 1957 CARP_LOCK(sc); 1958 sc->sc_ifas[index - 1] = ifa; 1959 ifa->ifa_carp = sc; 1960 carp_hmac_prepare(sc); 1961 carp_sc_state(sc); 1962 CARP_UNLOCK(sc); 1963 1964 sx_xunlock(&carp_sx); 1965 1966 return (0); 1967 } 1968 1969 void 1970 carp_detach(struct ifaddr *ifa, bool keep_cif) 1971 { 1972 struct ifnet *ifp = ifa->ifa_ifp; 1973 struct carp_if *cif = ifp->if_carp; 1974 struct carp_softc *sc = ifa->ifa_carp; 1975 int i, index; 1976 1977 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1978 1979 sx_xlock(&carp_sx); 1980 1981 CARP_LOCK(sc); 1982 /* Shift array. */ 1983 index = sc->sc_naddrs + sc->sc_naddrs6; 1984 for (i = 0; i < index; i++) 1985 if (sc->sc_ifas[i] == ifa) 1986 break; 1987 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1988 for (; i < index - 1; i++) 1989 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1990 sc->sc_ifas[index - 1] = NULL; 1991 1992 switch (ifa->ifa_addr->sa_family) { 1993 #ifdef INET 1994 case AF_INET: 1995 cif->cif_naddrs--; 1996 sc->sc_naddrs--; 1997 break; 1998 #endif 1999 #ifdef INET6 2000 case AF_INET6: 2001 cif->cif_naddrs6--; 2002 sc->sc_naddrs6--; 2003 break; 2004 #endif 2005 } 2006 2007 carp_ifa_delroute(ifa); 2008 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2009 2010 ifa->ifa_carp = NULL; 2011 ifa_free(ifa); 2012 2013 carp_hmac_prepare(sc); 2014 carp_sc_state(sc); 2015 2016 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2017 carp_destroy(sc); 2018 else 2019 CARP_UNLOCK(sc); 2020 2021 if (!keep_cif) 2022 CIF_FREE(cif); 2023 2024 sx_xunlock(&carp_sx); 2025 } 2026 2027 static void 2028 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2029 { 2030 2031 CARP_LOCK_ASSERT(sc); 2032 2033 if (sc->sc_state != state) { 2034 const char *carp_states[] = { CARP_STATES }; 2035 char subsys[IFNAMSIZ+5]; 2036 2037 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2038 sc->sc_carpdev->if_xname); 2039 2040 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2041 carp_states[sc->sc_state], carp_states[state], reason); 2042 2043 sc->sc_state = state; 2044 2045 devctl_notify("CARP", subsys, carp_states[state], NULL); 2046 } 2047 } 2048 2049 static void 2050 carp_linkstate(struct ifnet *ifp) 2051 { 2052 struct carp_softc *sc; 2053 2054 CIF_LOCK(ifp->if_carp); 2055 IFNET_FOREACH_CARP(ifp, sc) { 2056 CARP_LOCK(sc); 2057 carp_sc_state(sc); 2058 CARP_UNLOCK(sc); 2059 } 2060 CIF_UNLOCK(ifp->if_carp); 2061 } 2062 2063 static void 2064 carp_sc_state(struct carp_softc *sc) 2065 { 2066 2067 CARP_LOCK_ASSERT(sc); 2068 2069 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2070 !(sc->sc_carpdev->if_flags & IFF_UP) || 2071 !V_carp_allow) { 2072 callout_stop(&sc->sc_ad_tmo); 2073 #ifdef INET 2074 callout_stop(&sc->sc_md_tmo); 2075 #endif 2076 #ifdef INET6 2077 callout_stop(&sc->sc_md6_tmo); 2078 #endif 2079 carp_set_state(sc, INIT, "hardware interface down"); 2080 carp_setrun(sc, 0); 2081 if (!sc->sc_suppress) 2082 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2083 sc->sc_suppress = 1; 2084 } else { 2085 carp_set_state(sc, INIT, "hardware interface up"); 2086 carp_setrun(sc, 0); 2087 if (sc->sc_suppress) 2088 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2089 sc->sc_suppress = 0; 2090 } 2091 } 2092 2093 static void 2094 carp_demote_adj(int adj, char *reason) 2095 { 2096 atomic_add_int(&V_carp_demotion, adj); 2097 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2098 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2099 } 2100 2101 static int 2102 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2103 { 2104 int new, error; 2105 struct carp_softc *sc; 2106 2107 new = V_carp_allow; 2108 error = sysctl_handle_int(oidp, &new, 0, req); 2109 if (error || !req->newptr) 2110 return (error); 2111 2112 if (V_carp_allow != new) { 2113 V_carp_allow = new; 2114 2115 mtx_lock(&carp_mtx); 2116 LIST_FOREACH(sc, &carp_list, sc_next) { 2117 CARP_LOCK(sc); 2118 if (curvnet == sc->sc_carpdev->if_vnet) 2119 carp_sc_state(sc); 2120 CARP_UNLOCK(sc); 2121 } 2122 mtx_unlock(&carp_mtx); 2123 } 2124 2125 return (0); 2126 } 2127 2128 static int 2129 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2130 { 2131 int new, error; 2132 2133 new = V_carp_dscp; 2134 error = sysctl_handle_int(oidp, &new, 0, req); 2135 if (error || !req->newptr) 2136 return (error); 2137 2138 if (new < 0 || new > 63) 2139 return (EINVAL); 2140 2141 V_carp_dscp = new; 2142 2143 return (0); 2144 } 2145 2146 static int 2147 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2148 { 2149 int new, error; 2150 2151 new = V_carp_demotion; 2152 error = sysctl_handle_int(oidp, &new, 0, req); 2153 if (error || !req->newptr) 2154 return (error); 2155 2156 carp_demote_adj(new, "sysctl"); 2157 2158 return (0); 2159 } 2160 2161 #ifdef INET 2162 extern struct domain inetdomain; 2163 static struct protosw in_carp_protosw = { 2164 .pr_type = SOCK_RAW, 2165 .pr_domain = &inetdomain, 2166 .pr_protocol = IPPROTO_CARP, 2167 .pr_flags = PR_ATOMIC|PR_ADDR, 2168 .pr_input = carp_input, 2169 .pr_output = rip_output, 2170 .pr_ctloutput = rip_ctloutput, 2171 .pr_usrreqs = &rip_usrreqs 2172 }; 2173 #endif 2174 2175 #ifdef INET6 2176 extern struct domain inet6domain; 2177 static struct protosw in6_carp_protosw = { 2178 .pr_type = SOCK_RAW, 2179 .pr_domain = &inet6domain, 2180 .pr_protocol = IPPROTO_CARP, 2181 .pr_flags = PR_ATOMIC|PR_ADDR, 2182 .pr_input = carp6_input, 2183 .pr_output = rip6_output, 2184 .pr_ctloutput = rip6_ctloutput, 2185 .pr_usrreqs = &rip6_usrreqs 2186 }; 2187 #endif 2188 2189 static void 2190 carp_mod_cleanup(void) 2191 { 2192 2193 #ifdef INET 2194 if (proto_reg[CARP_INET] == 0) { 2195 (void)ipproto_unregister(IPPROTO_CARP); 2196 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2197 proto_reg[CARP_INET] = -1; 2198 } 2199 carp_iamatch_p = NULL; 2200 #endif 2201 #ifdef INET6 2202 if (proto_reg[CARP_INET6] == 0) { 2203 (void)ip6proto_unregister(IPPROTO_CARP); 2204 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2205 proto_reg[CARP_INET6] = -1; 2206 } 2207 carp_iamatch6_p = NULL; 2208 carp_macmatch6_p = NULL; 2209 #endif 2210 carp_ioctl_p = NULL; 2211 carp_attach_p = NULL; 2212 carp_detach_p = NULL; 2213 carp_get_vhid_p = NULL; 2214 carp_linkstate_p = NULL; 2215 carp_forus_p = NULL; 2216 carp_output_p = NULL; 2217 carp_demote_adj_p = NULL; 2218 carp_master_p = NULL; 2219 mtx_unlock(&carp_mtx); 2220 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2221 mtx_destroy(&carp_mtx); 2222 sx_destroy(&carp_sx); 2223 } 2224 2225 static int 2226 carp_mod_load(void) 2227 { 2228 int err; 2229 2230 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2231 sx_init(&carp_sx, "carp_sx"); 2232 LIST_INIT(&carp_list); 2233 carp_get_vhid_p = carp_get_vhid; 2234 carp_forus_p = carp_forus; 2235 carp_output_p = carp_output; 2236 carp_linkstate_p = carp_linkstate; 2237 carp_ioctl_p = carp_ioctl; 2238 carp_attach_p = carp_attach; 2239 carp_detach_p = carp_detach; 2240 carp_demote_adj_p = carp_demote_adj; 2241 carp_master_p = carp_master; 2242 #ifdef INET6 2243 carp_iamatch6_p = carp_iamatch6; 2244 carp_macmatch6_p = carp_macmatch6; 2245 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2246 (struct protosw *)&in6_carp_protosw); 2247 if (proto_reg[CARP_INET6]) { 2248 printf("carp: error %d attaching to PF_INET6\n", 2249 proto_reg[CARP_INET6]); 2250 carp_mod_cleanup(); 2251 return (proto_reg[CARP_INET6]); 2252 } 2253 err = ip6proto_register(IPPROTO_CARP); 2254 if (err) { 2255 printf("carp: error %d registering with INET6\n", err); 2256 carp_mod_cleanup(); 2257 return (err); 2258 } 2259 #endif 2260 #ifdef INET 2261 carp_iamatch_p = carp_iamatch; 2262 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2263 if (proto_reg[CARP_INET]) { 2264 printf("carp: error %d attaching to PF_INET\n", 2265 proto_reg[CARP_INET]); 2266 carp_mod_cleanup(); 2267 return (proto_reg[CARP_INET]); 2268 } 2269 err = ipproto_register(IPPROTO_CARP); 2270 if (err) { 2271 printf("carp: error %d registering with INET\n", err); 2272 carp_mod_cleanup(); 2273 return (err); 2274 } 2275 #endif 2276 return (0); 2277 } 2278 2279 static int 2280 carp_modevent(module_t mod, int type, void *data) 2281 { 2282 switch (type) { 2283 case MOD_LOAD: 2284 return carp_mod_load(); 2285 /* NOTREACHED */ 2286 case MOD_UNLOAD: 2287 mtx_lock(&carp_mtx); 2288 if (LIST_EMPTY(&carp_list)) 2289 carp_mod_cleanup(); 2290 else { 2291 mtx_unlock(&carp_mtx); 2292 return (EBUSY); 2293 } 2294 break; 2295 2296 default: 2297 return (EINVAL); 2298 } 2299 2300 return (0); 2301 } 2302 2303 static moduledata_t carp_mod = { 2304 "carp", 2305 carp_modevent, 2306 0 2307 }; 2308 2309 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2310