1 /*- 2 * Copyright (c) 2002 Michael Shalayeff. 3 * Copyright (c) 2003 Ryan McBride. 4 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/bus.h> 39 #include <sys/jail.h> 40 #include <sys/kernel.h> 41 #include <sys/limits.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/module.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/fddi.h> 57 #include <net/if.h> 58 #include <net/if_dl.h> 59 #include <net/if_llatbl.h> 60 #include <net/if_types.h> 61 #include <net/iso88025.h> 62 #include <net/route.h> 63 #include <net/vnet.h> 64 65 #if defined(INET) || defined(INET6) 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/ip_carp.h> 69 #include <netinet/ip.h> 70 #include <machine/in_cksum.h> 71 #endif 72 #ifdef INET 73 #include <netinet/ip_var.h> 74 #include <netinet/if_ether.h> 75 #endif 76 77 #ifdef INET6 78 #include <netinet/icmp6.h> 79 #include <netinet/ip6.h> 80 #include <netinet6/ip6protosw.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 90 91 struct carp_softc { 92 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 93 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 94 struct sockaddr_dl sc_addr; /* Our link level address. */ 95 struct callout sc_ad_tmo; /* Advertising timeout. */ 96 #ifdef INET 97 struct callout sc_md_tmo; /* Master down timeout. */ 98 #endif 99 #ifdef INET6 100 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 101 #endif 102 struct mtx sc_mtx; 103 104 int sc_vhid; 105 int sc_advskew; 106 int sc_advbase; 107 108 int sc_naddrs; 109 int sc_naddrs6; 110 int sc_ifasiz; 111 enum { INIT = 0, BACKUP, MASTER } sc_state; 112 int sc_suppress; 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_init_counter; 119 uint64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 128 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 129 }; 130 131 struct carp_if { 132 #ifdef INET 133 int cif_naddrs; 134 #endif 135 #ifdef INET6 136 int cif_naddrs6; 137 #endif 138 TAILQ_HEAD(, carp_softc) cif_vrs; 139 #ifdef INET 140 struct ip_moptions cif_imo; 141 #endif 142 #ifdef INET6 143 struct ip6_moptions cif_im6o; 144 #endif 145 struct ifnet *cif_ifp; 146 struct mtx cif_mtx; 147 }; 148 149 #define CARP_INET 0 150 #define CARP_INET6 1 151 static int proto_reg[] = {-1, -1}; 152 153 /* 154 * Brief design of carp(4). 155 * 156 * Any carp-capable ifnet may have a list of carp softcs hanging off 157 * its ifp->if_carp pointer. Each softc represents one unique virtual 158 * host id, or vhid. The softc has a back pointer to the ifnet. All 159 * softcs are joined in a global list, which has quite limited use. 160 * 161 * Any interface address that takes part in CARP negotiation has a 162 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 163 * AF_INET or AF_INET6 address. 164 * 165 * Although, one can get the softc's backpointer to ifnet and traverse 166 * through its ifp->if_addrhead queue to find all interface addresses 167 * involved in CARP, we keep a growable array of ifaddr pointers. This 168 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 169 * do calls into the network stack, thus avoiding LORs. 170 * 171 * Locking: 172 * 173 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 174 * callout-driven events and ioctl()s. 175 * 176 * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to 177 * traverse the global list we use the mutex carp_mtx. 178 * 179 * Known issues with locking: 180 * 181 * - There is no protection for races between two ioctl() requests, 182 * neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all 183 * interface ioctl()s should be serialized right in net/if.c. 184 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 185 * counting is done on the softc. 186 * - On module unload we may race (?) with packet processing thread 187 * dereferencing our function pointers. 188 */ 189 190 static int carp_allow = 1; /* Accept incoming CARP packets. */ 191 static int carp_preempt = 0; /* Preempt slower nodes. */ 192 static int carp_log = 1; /* Log level. */ 193 static int carp_demotion = 0; /* Global advskew demotion. */ 194 static int carp_senderr_adj = CARP_MAXSKEW; /* Send error demotion factor */ 195 static int carp_ifdown_adj = CARP_MAXSKEW; /* Iface down demotion factor */ 196 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 197 198 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 199 SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, &carp_allow, 0, 200 "Accept incoming CARP packets"); 201 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, &carp_preempt, 0, 202 "High-priority backup preemption mode"); 203 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, &carp_log, 0, 204 "CARP log level"); 205 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW, 206 0, 0, carp_demote_adj_sysctl, "I", 207 "Adjust demotion factor (skew of advskew)"); 208 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, 209 &carp_senderr_adj, 0, "Send error demotion factor adjustment"); 210 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, 211 &carp_ifdown_adj, 0, "Interface down demotion factor adjustment"); 212 213 static counter_u64_t carpstats[sizeof(struct carpstats) / sizeof(uint64_t)]; 214 #define CARPSTATS_ADD(name, val) \ 215 counter_u64_add(carpstats[offsetof(struct carpstats, name) / \ 216 sizeof(uint64_t)], (val)) 217 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 218 219 static int 220 carpstats_sysctl(SYSCTL_HANDLER_ARGS) 221 { 222 struct carpstats s; 223 224 COUNTER_ARRAY_COPY(carpstats, &s, sizeof(s) / sizeof(uint64_t)); 225 if (req->newptr) 226 COUNTER_ARRAY_ZERO(carpstats, sizeof(s) / sizeof(uint64_t)); 227 return (SYSCTL_OUT(req, &s, sizeof(s))); 228 } 229 SYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW, 230 NULL, 0, carpstats_sysctl, "I", 231 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 232 233 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 234 NULL, MTX_DEF) 235 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 236 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 237 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 238 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 239 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 240 NULL, MTX_DEF) 241 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 242 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 243 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 244 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 245 #define CIF_FREE(cif) do { \ 246 CIF_LOCK_ASSERT(cif); \ 247 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 248 carp_free_if(cif); \ 249 else \ 250 CIF_UNLOCK(cif); \ 251 } while (0) 252 253 #define CARP_LOG(...) do { \ 254 if (carp_log > 0) \ 255 log(LOG_INFO, "carp: " __VA_ARGS__); \ 256 } while (0) 257 258 #define CARP_DEBUG(...) do { \ 259 if (carp_log > 1) \ 260 log(LOG_DEBUG, __VA_ARGS__); \ 261 } while (0) 262 263 #define IFNET_FOREACH_IFA(ifp, ifa) \ 264 IF_ADDR_LOCK_ASSERT(ifp); \ 265 TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 266 if ((ifa)->ifa_carp != NULL) 267 268 #define CARP_FOREACH_IFA(sc, ifa) \ 269 CARP_LOCK_ASSERT(sc); \ 270 for (int _i = 0; \ 271 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 272 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 273 ++_i) 274 275 #define IFNET_FOREACH_CARP(ifp, sc) \ 276 CIF_LOCK_ASSERT(ifp->if_carp); \ 277 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 278 279 #define DEMOTE_ADVSKEW(sc) \ 280 (((sc)->sc_advskew + carp_demotion > CARP_MAXSKEW) ? \ 281 CARP_MAXSKEW : ((sc)->sc_advskew + carp_demotion)) 282 283 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 284 static struct carp_softc 285 *carp_alloc(struct ifnet *); 286 static void carp_detach_locked(struct ifaddr *); 287 static void carp_destroy(struct carp_softc *); 288 static struct carp_if 289 *carp_alloc_if(struct ifnet *); 290 static void carp_free_if(struct carp_if *); 291 static void carp_set_state(struct carp_softc *, int); 292 static void carp_sc_state(struct carp_softc *); 293 static void carp_setrun(struct carp_softc *, sa_family_t); 294 static void carp_master_down(void *); 295 static void carp_master_down_locked(struct carp_softc *); 296 static void carp_send_ad(void *); 297 static void carp_send_ad_locked(struct carp_softc *); 298 static void carp_addroute(struct carp_softc *); 299 static void carp_ifa_addroute(struct ifaddr *); 300 static void carp_delroute(struct carp_softc *); 301 static void carp_ifa_delroute(struct ifaddr *); 302 static void carp_send_ad_all(void *, int); 303 static void carp_demote_adj(int, char *); 304 305 static LIST_HEAD(, carp_softc) carp_list; 306 static struct mtx carp_mtx; 307 static struct task carp_sendall_task = 308 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 309 310 static void 311 carp_hmac_prepare(struct carp_softc *sc) 312 { 313 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 314 uint8_t vhid = sc->sc_vhid & 0xff; 315 struct ifaddr *ifa; 316 int i, found; 317 #ifdef INET 318 struct in_addr last, cur, in; 319 #endif 320 #ifdef INET6 321 struct in6_addr last6, cur6, in6; 322 #endif 323 324 CARP_LOCK_ASSERT(sc); 325 326 /* Compute ipad from key. */ 327 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 328 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 329 for (i = 0; i < sizeof(sc->sc_pad); i++) 330 sc->sc_pad[i] ^= 0x36; 331 332 /* Precompute first part of inner hash. */ 333 SHA1Init(&sc->sc_sha1); 334 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 335 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 336 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 337 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 338 #ifdef INET 339 cur.s_addr = 0; 340 do { 341 found = 0; 342 last = cur; 343 cur.s_addr = 0xffffffff; 344 CARP_FOREACH_IFA(sc, ifa) { 345 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 346 if (ifa->ifa_addr->sa_family == AF_INET && 347 ntohl(in.s_addr) > ntohl(last.s_addr) && 348 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 349 cur.s_addr = in.s_addr; 350 found++; 351 } 352 } 353 if (found) 354 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 355 } while (found); 356 #endif /* INET */ 357 #ifdef INET6 358 memset(&cur6, 0, sizeof(cur6)); 359 do { 360 found = 0; 361 last6 = cur6; 362 memset(&cur6, 0xff, sizeof(cur6)); 363 CARP_FOREACH_IFA(sc, ifa) { 364 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 365 if (IN6_IS_SCOPE_EMBED(&in6)) 366 in6.s6_addr16[1] = 0; 367 if (ifa->ifa_addr->sa_family == AF_INET6 && 368 memcmp(&in6, &last6, sizeof(in6)) > 0 && 369 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 370 cur6 = in6; 371 found++; 372 } 373 } 374 if (found) 375 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 376 } while (found); 377 #endif /* INET6 */ 378 379 /* convert ipad to opad */ 380 for (i = 0; i < sizeof(sc->sc_pad); i++) 381 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 382 } 383 384 static void 385 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 386 unsigned char md[20]) 387 { 388 SHA1_CTX sha1ctx; 389 390 CARP_LOCK_ASSERT(sc); 391 392 /* fetch first half of inner hash */ 393 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 394 395 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 396 SHA1Final(md, &sha1ctx); 397 398 /* outer hash */ 399 SHA1Init(&sha1ctx); 400 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 401 SHA1Update(&sha1ctx, md, 20); 402 SHA1Final(md, &sha1ctx); 403 } 404 405 static int 406 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 407 unsigned char md[20]) 408 { 409 unsigned char md2[20]; 410 411 CARP_LOCK_ASSERT(sc); 412 413 carp_hmac_generate(sc, counter, md2); 414 415 return (bcmp(md, md2, sizeof(md2))); 416 } 417 418 /* 419 * process input packet. 420 * we have rearranged checks order compared to the rfc, 421 * but it seems more efficient this way or not possible otherwise. 422 */ 423 #ifdef INET 424 void 425 carp_input(struct mbuf *m, int hlen) 426 { 427 struct ip *ip = mtod(m, struct ip *); 428 struct carp_header *ch; 429 int iplen, len; 430 431 CARPSTATS_INC(carps_ipackets); 432 433 if (!carp_allow) { 434 m_freem(m); 435 return; 436 } 437 438 /* verify that the IP TTL is 255. */ 439 if (ip->ip_ttl != CARP_DFLTTL) { 440 CARPSTATS_INC(carps_badttl); 441 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 442 ip->ip_ttl, 443 m->m_pkthdr.rcvif->if_xname); 444 m_freem(m); 445 return; 446 } 447 448 iplen = ip->ip_hl << 2; 449 450 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 451 CARPSTATS_INC(carps_badlen); 452 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 453 "on %s\n", __func__, m->m_len - sizeof(struct ip), 454 m->m_pkthdr.rcvif->if_xname); 455 m_freem(m); 456 return; 457 } 458 459 if (iplen + sizeof(*ch) < m->m_len) { 460 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 461 CARPSTATS_INC(carps_hdrops); 462 CARP_DEBUG("%s: pullup failed\n", __func__); 463 return; 464 } 465 ip = mtod(m, struct ip *); 466 } 467 ch = (struct carp_header *)((char *)ip + iplen); 468 469 /* 470 * verify that the received packet length is 471 * equal to the CARP header 472 */ 473 len = iplen + sizeof(*ch); 474 if (len > m->m_pkthdr.len) { 475 CARPSTATS_INC(carps_badlen); 476 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 477 m->m_pkthdr.len, 478 m->m_pkthdr.rcvif->if_xname); 479 m_freem(m); 480 return; 481 } 482 483 if ((m = m_pullup(m, len)) == NULL) { 484 CARPSTATS_INC(carps_hdrops); 485 return; 486 } 487 ip = mtod(m, struct ip *); 488 ch = (struct carp_header *)((char *)ip + iplen); 489 490 /* verify the CARP checksum */ 491 m->m_data += iplen; 492 if (in_cksum(m, len - iplen)) { 493 CARPSTATS_INC(carps_badsum); 494 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 495 m->m_pkthdr.rcvif->if_xname); 496 m_freem(m); 497 return; 498 } 499 m->m_data -= iplen; 500 501 carp_input_c(m, ch, AF_INET); 502 } 503 #endif 504 505 #ifdef INET6 506 int 507 carp6_input(struct mbuf **mp, int *offp, int proto) 508 { 509 struct mbuf *m = *mp; 510 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 511 struct carp_header *ch; 512 u_int len; 513 514 CARPSTATS_INC(carps_ipackets6); 515 516 if (!carp_allow) { 517 m_freem(m); 518 return (IPPROTO_DONE); 519 } 520 521 /* check if received on a valid carp interface */ 522 if (m->m_pkthdr.rcvif->if_carp == NULL) { 523 CARPSTATS_INC(carps_badif); 524 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 525 __func__, m->m_pkthdr.rcvif->if_xname); 526 m_freem(m); 527 return (IPPROTO_DONE); 528 } 529 530 /* verify that the IP TTL is 255 */ 531 if (ip6->ip6_hlim != CARP_DFLTTL) { 532 CARPSTATS_INC(carps_badttl); 533 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 534 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 535 m_freem(m); 536 return (IPPROTO_DONE); 537 } 538 539 /* verify that we have a complete carp packet */ 540 len = m->m_len; 541 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 542 if (ch == NULL) { 543 CARPSTATS_INC(carps_badlen); 544 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 545 return (IPPROTO_DONE); 546 } 547 548 549 /* verify the CARP checksum */ 550 m->m_data += *offp; 551 if (in_cksum(m, sizeof(*ch))) { 552 CARPSTATS_INC(carps_badsum); 553 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 554 m->m_pkthdr.rcvif->if_xname); 555 m_freem(m); 556 return (IPPROTO_DONE); 557 } 558 m->m_data -= *offp; 559 560 carp_input_c(m, ch, AF_INET6); 561 return (IPPROTO_DONE); 562 } 563 #endif /* INET6 */ 564 565 static void 566 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 567 { 568 struct ifnet *ifp = m->m_pkthdr.rcvif; 569 struct ifaddr *ifa; 570 struct carp_softc *sc; 571 uint64_t tmp_counter; 572 struct timeval sc_tv, ch_tv; 573 574 /* verify that the VHID is valid on the receiving interface */ 575 IF_ADDR_RLOCK(ifp); 576 IFNET_FOREACH_IFA(ifp, ifa) 577 if (ifa->ifa_addr->sa_family == af && 578 ifa->ifa_carp->sc_vhid == ch->carp_vhid) { 579 ifa_ref(ifa); 580 break; 581 } 582 IF_ADDR_RUNLOCK(ifp); 583 584 if (ifa == NULL) { 585 CARPSTATS_INC(carps_badvhid); 586 m_freem(m); 587 return; 588 } 589 590 /* verify the CARP version. */ 591 if (ch->carp_version != CARP_VERSION) { 592 CARPSTATS_INC(carps_badver); 593 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 594 ch->carp_version); 595 ifa_free(ifa); 596 m_freem(m); 597 return; 598 } 599 600 sc = ifa->ifa_carp; 601 CARP_LOCK(sc); 602 ifa_free(ifa); 603 604 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 605 CARPSTATS_INC(carps_badauth); 606 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 607 sc->sc_vhid, ifp->if_xname); 608 goto out; 609 } 610 611 tmp_counter = ntohl(ch->carp_counter[0]); 612 tmp_counter = tmp_counter<<32; 613 tmp_counter += ntohl(ch->carp_counter[1]); 614 615 /* XXX Replay protection goes here */ 616 617 sc->sc_init_counter = 0; 618 sc->sc_counter = tmp_counter; 619 620 sc_tv.tv_sec = sc->sc_advbase; 621 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 622 ch_tv.tv_sec = ch->carp_advbase; 623 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 624 625 switch (sc->sc_state) { 626 case INIT: 627 break; 628 case MASTER: 629 /* 630 * If we receive an advertisement from a master who's going to 631 * be more frequent than us, go into BACKUP state. 632 */ 633 if (timevalcmp(&sc_tv, &ch_tv, >) || 634 timevalcmp(&sc_tv, &ch_tv, ==)) { 635 callout_stop(&sc->sc_ad_tmo); 636 CARP_LOG("VHID %u@%s: MASTER -> BACKUP " 637 "(more frequent advertisement received)\n", 638 sc->sc_vhid, 639 sc->sc_carpdev->if_xname); 640 carp_set_state(sc, BACKUP); 641 carp_setrun(sc, 0); 642 carp_delroute(sc); 643 } 644 break; 645 case BACKUP: 646 /* 647 * If we're pre-empting masters who advertise slower than us, 648 * and this one claims to be slower, treat him as down. 649 */ 650 if (carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 651 CARP_LOG("VHID %u@%s: BACKUP -> MASTER " 652 "(preempting a slower master)\n", 653 sc->sc_vhid, 654 sc->sc_carpdev->if_xname); 655 carp_master_down_locked(sc); 656 break; 657 } 658 659 /* 660 * If the master is going to advertise at such a low frequency 661 * that he's guaranteed to time out, we'd might as well just 662 * treat him as timed out now. 663 */ 664 sc_tv.tv_sec = sc->sc_advbase * 3; 665 if (timevalcmp(&sc_tv, &ch_tv, <)) { 666 CARP_LOG("VHID %u@%s: BACKUP -> MASTER " 667 "(master timed out)\n", 668 sc->sc_vhid, 669 sc->sc_carpdev->if_xname); 670 carp_master_down_locked(sc); 671 break; 672 } 673 674 /* 675 * Otherwise, we reset the counter and wait for the next 676 * advertisement. 677 */ 678 carp_setrun(sc, af); 679 break; 680 } 681 682 out: 683 CARP_UNLOCK(sc); 684 m_freem(m); 685 } 686 687 static int 688 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 689 { 690 struct m_tag *mtag; 691 692 if (sc->sc_init_counter) { 693 /* this could also be seconds since unix epoch */ 694 sc->sc_counter = arc4random(); 695 sc->sc_counter = sc->sc_counter << 32; 696 sc->sc_counter += arc4random(); 697 } else 698 sc->sc_counter++; 699 700 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 701 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 702 703 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 704 705 /* Tag packet for carp_output */ 706 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 707 M_NOWAIT)) == NULL) { 708 m_freem(m); 709 CARPSTATS_INC(carps_onomem); 710 return (ENOMEM); 711 } 712 bcopy(&sc, mtag + 1, sizeof(sc)); 713 m_tag_prepend(m, mtag); 714 715 return (0); 716 } 717 718 /* 719 * To avoid LORs and possible recursions this function shouldn't 720 * be called directly, but scheduled via taskqueue. 721 */ 722 static void 723 carp_send_ad_all(void *ctx __unused, int pending __unused) 724 { 725 struct carp_softc *sc; 726 727 mtx_lock(&carp_mtx); 728 LIST_FOREACH(sc, &carp_list, sc_next) 729 if (sc->sc_state == MASTER) { 730 CARP_LOCK(sc); 731 CURVNET_SET(sc->sc_carpdev->if_vnet); 732 carp_send_ad_locked(sc); 733 CURVNET_RESTORE(); 734 CARP_UNLOCK(sc); 735 } 736 mtx_unlock(&carp_mtx); 737 } 738 739 /* Send a periodic advertisement, executed in callout context. */ 740 static void 741 carp_send_ad(void *v) 742 { 743 struct carp_softc *sc = v; 744 745 CARP_LOCK_ASSERT(sc); 746 CURVNET_SET(sc->sc_carpdev->if_vnet); 747 carp_send_ad_locked(sc); 748 CURVNET_RESTORE(); 749 CARP_UNLOCK(sc); 750 } 751 752 static void 753 carp_send_ad_locked(struct carp_softc *sc) 754 { 755 struct carp_header ch; 756 struct timeval tv; 757 struct sockaddr sa; 758 struct ifaddr *ifa; 759 struct carp_header *ch_ptr; 760 struct mbuf *m; 761 int len, advskew; 762 763 CARP_LOCK_ASSERT(sc); 764 765 advskew = DEMOTE_ADVSKEW(sc); 766 tv.tv_sec = sc->sc_advbase; 767 tv.tv_usec = advskew * 1000000 / 256; 768 769 ch.carp_version = CARP_VERSION; 770 ch.carp_type = CARP_ADVERTISEMENT; 771 ch.carp_vhid = sc->sc_vhid; 772 ch.carp_advbase = sc->sc_advbase; 773 ch.carp_advskew = advskew; 774 ch.carp_authlen = 7; /* XXX DEFINE */ 775 ch.carp_pad1 = 0; /* must be zero */ 776 ch.carp_cksum = 0; 777 778 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 779 780 #ifdef INET 781 if (sc->sc_naddrs) { 782 struct ip *ip; 783 784 m = m_gethdr(M_NOWAIT, MT_DATA); 785 if (m == NULL) { 786 CARPSTATS_INC(carps_onomem); 787 goto resched; 788 } 789 len = sizeof(*ip) + sizeof(ch); 790 m->m_pkthdr.len = len; 791 m->m_pkthdr.rcvif = NULL; 792 m->m_len = len; 793 MH_ALIGN(m, m->m_len); 794 m->m_flags |= M_MCAST; 795 ip = mtod(m, struct ip *); 796 ip->ip_v = IPVERSION; 797 ip->ip_hl = sizeof(*ip) >> 2; 798 ip->ip_tos = IPTOS_LOWDELAY; 799 ip->ip_len = htons(len); 800 ip->ip_id = ip_newid(); 801 ip->ip_off = htons(IP_DF); 802 ip->ip_ttl = CARP_DFLTTL; 803 ip->ip_p = IPPROTO_CARP; 804 ip->ip_sum = 0; 805 806 bzero(&sa, sizeof(sa)); 807 sa.sa_family = AF_INET; 808 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 809 if (ifa != NULL) { 810 ip->ip_src.s_addr = 811 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 812 ifa_free(ifa); 813 } else 814 ip->ip_src.s_addr = 0; 815 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 816 817 ch_ptr = (struct carp_header *)(&ip[1]); 818 bcopy(&ch, ch_ptr, sizeof(ch)); 819 if (carp_prepare_ad(m, sc, ch_ptr)) 820 goto resched; 821 822 m->m_data += sizeof(*ip); 823 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 824 m->m_data -= sizeof(*ip); 825 826 CARPSTATS_INC(carps_opackets); 827 828 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, 829 &sc->sc_carpdev->if_carp->cif_imo, NULL)) { 830 if (sc->sc_sendad_errors < INT_MAX) 831 sc->sc_sendad_errors++; 832 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) 833 carp_demote_adj(carp_senderr_adj, "send error"); 834 sc->sc_sendad_success = 0; 835 } else { 836 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 837 if (++sc->sc_sendad_success >= 838 CARP_SENDAD_MIN_SUCCESS) { 839 carp_demote_adj(-carp_senderr_adj, 840 "send ok"); 841 sc->sc_sendad_errors = 0; 842 } 843 } else 844 sc->sc_sendad_errors = 0; 845 } 846 } 847 #endif /* INET */ 848 #ifdef INET6 849 if (sc->sc_naddrs6) { 850 struct ip6_hdr *ip6; 851 852 m = m_gethdr(M_NOWAIT, MT_DATA); 853 if (m == NULL) { 854 CARPSTATS_INC(carps_onomem); 855 goto resched; 856 } 857 len = sizeof(*ip6) + sizeof(ch); 858 m->m_pkthdr.len = len; 859 m->m_pkthdr.rcvif = NULL; 860 m->m_len = len; 861 MH_ALIGN(m, m->m_len); 862 m->m_flags |= M_MCAST; 863 ip6 = mtod(m, struct ip6_hdr *); 864 bzero(ip6, sizeof(*ip6)); 865 ip6->ip6_vfc |= IPV6_VERSION; 866 ip6->ip6_hlim = CARP_DFLTTL; 867 ip6->ip6_nxt = IPPROTO_CARP; 868 bzero(&sa, sizeof(sa)); 869 870 /* set the source address */ 871 sa.sa_family = AF_INET6; 872 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 873 if (ifa != NULL) { 874 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 875 sizeof(struct in6_addr)); 876 ifa_free(ifa); 877 } else 878 /* This should never happen with IPv6. */ 879 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 880 881 /* Set the multicast destination. */ 882 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 883 ip6->ip6_dst.s6_addr8[15] = 0x12; 884 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 885 m_freem(m); 886 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 887 goto resched; 888 } 889 890 ch_ptr = (struct carp_header *)(&ip6[1]); 891 bcopy(&ch, ch_ptr, sizeof(ch)); 892 if (carp_prepare_ad(m, sc, ch_ptr)) 893 goto resched; 894 895 m->m_data += sizeof(*ip6); 896 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 897 m->m_data -= sizeof(*ip6); 898 899 CARPSTATS_INC(carps_opackets6); 900 901 if (ip6_output(m, NULL, NULL, 0, 902 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) { 903 if (sc->sc_sendad_errors < INT_MAX) 904 sc->sc_sendad_errors++; 905 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) 906 carp_demote_adj(carp_senderr_adj, 907 "send6 error"); 908 sc->sc_sendad_success = 0; 909 } else { 910 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 911 if (++sc->sc_sendad_success >= 912 CARP_SENDAD_MIN_SUCCESS) { 913 carp_demote_adj(-carp_senderr_adj, 914 "send6 ok"); 915 sc->sc_sendad_errors = 0; 916 } 917 } else 918 sc->sc_sendad_errors = 0; 919 } 920 } 921 #endif /* INET6 */ 922 923 resched: 924 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 925 } 926 927 static void 928 carp_addroute(struct carp_softc *sc) 929 { 930 struct ifaddr *ifa; 931 932 CARP_FOREACH_IFA(sc, ifa) 933 carp_ifa_addroute(ifa); 934 } 935 936 static void 937 carp_ifa_addroute(struct ifaddr *ifa) 938 { 939 940 switch (ifa->ifa_addr->sa_family) { 941 #ifdef INET 942 case AF_INET: 943 in_addprefix(ifatoia(ifa), RTF_UP); 944 ifa_add_loopback_route(ifa, 945 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 946 break; 947 #endif 948 #ifdef INET6 949 case AF_INET6: 950 ifa_add_loopback_route(ifa, 951 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 952 in6_ifaddloop(ifa); 953 break; 954 #endif 955 } 956 } 957 958 static void 959 carp_delroute(struct carp_softc *sc) 960 { 961 struct ifaddr *ifa; 962 963 CARP_FOREACH_IFA(sc, ifa) 964 carp_ifa_delroute(ifa); 965 } 966 967 static void 968 carp_ifa_delroute(struct ifaddr *ifa) 969 { 970 971 switch (ifa->ifa_addr->sa_family) { 972 #ifdef INET 973 case AF_INET: 974 ifa_del_loopback_route(ifa, 975 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 976 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 977 break; 978 #endif 979 #ifdef INET6 980 case AF_INET6: 981 ifa_del_loopback_route(ifa, 982 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 983 in6_ifremloop(ifa); 984 break; 985 #endif 986 } 987 } 988 989 int 990 carp_master(struct ifaddr *ifa) 991 { 992 struct carp_softc *sc = ifa->ifa_carp; 993 994 return (sc->sc_state == MASTER); 995 } 996 997 #ifdef INET 998 /* 999 * Broadcast a gratuitous ARP request containing 1000 * the virtual router MAC address for each IP address 1001 * associated with the virtual router. 1002 */ 1003 static void 1004 carp_send_arp(struct carp_softc *sc) 1005 { 1006 struct ifaddr *ifa; 1007 1008 CARP_FOREACH_IFA(sc, ifa) 1009 if (ifa->ifa_addr->sa_family == AF_INET) 1010 arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr)); 1011 } 1012 1013 int 1014 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1015 { 1016 struct carp_softc *sc = ifa->ifa_carp; 1017 1018 if (sc->sc_state == MASTER) { 1019 *enaddr = LLADDR(&sc->sc_addr); 1020 return (1); 1021 } 1022 1023 return (0); 1024 } 1025 #endif 1026 1027 #ifdef INET6 1028 static void 1029 carp_send_na(struct carp_softc *sc) 1030 { 1031 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1032 struct ifaddr *ifa; 1033 struct in6_addr *in6; 1034 1035 CARP_FOREACH_IFA(sc, ifa) { 1036 if (ifa->ifa_addr->sa_family != AF_INET6) 1037 continue; 1038 1039 in6 = IFA_IN6(ifa); 1040 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1041 ND_NA_FLAG_OVERRIDE, 1, NULL); 1042 DELAY(1000); /* XXX */ 1043 } 1044 } 1045 1046 /* 1047 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1048 * matches and is not a carp address. Returns NULL otherwise. 1049 */ 1050 struct ifaddr * 1051 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1052 { 1053 struct ifaddr *ifa; 1054 1055 ifa = NULL; 1056 IF_ADDR_RLOCK(ifp); 1057 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1058 if (ifa->ifa_addr->sa_family != AF_INET6) 1059 continue; 1060 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1061 continue; 1062 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1063 ifa = NULL; 1064 else 1065 ifa_ref(ifa); 1066 break; 1067 } 1068 IF_ADDR_RUNLOCK(ifp); 1069 1070 return (ifa); 1071 } 1072 1073 caddr_t 1074 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1075 { 1076 struct ifaddr *ifa; 1077 1078 IF_ADDR_RLOCK(ifp); 1079 IFNET_FOREACH_IFA(ifp, ifa) 1080 if (ifa->ifa_addr->sa_family == AF_INET6 && 1081 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1082 struct carp_softc *sc = ifa->ifa_carp; 1083 struct m_tag *mtag; 1084 1085 IF_ADDR_RUNLOCK(ifp); 1086 1087 mtag = m_tag_get(PACKET_TAG_CARP, 1088 sizeof(struct carp_softc *), M_NOWAIT); 1089 if (mtag == NULL) 1090 /* Better a bit than nothing. */ 1091 return (LLADDR(&sc->sc_addr)); 1092 1093 bcopy(&sc, mtag + 1, sizeof(sc)); 1094 m_tag_prepend(m, mtag); 1095 1096 return (LLADDR(&sc->sc_addr)); 1097 } 1098 IF_ADDR_RUNLOCK(ifp); 1099 1100 return (NULL); 1101 } 1102 #endif /* INET6 */ 1103 1104 int 1105 carp_forus(struct ifnet *ifp, u_char *dhost) 1106 { 1107 struct carp_softc *sc; 1108 uint8_t *ena = dhost; 1109 1110 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1111 return (0); 1112 1113 CIF_LOCK(ifp->if_carp); 1114 IFNET_FOREACH_CARP(ifp, sc) { 1115 CARP_LOCK(sc); 1116 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1117 ETHER_ADDR_LEN)) { 1118 CARP_UNLOCK(sc); 1119 CIF_UNLOCK(ifp->if_carp); 1120 return (1); 1121 } 1122 CARP_UNLOCK(sc); 1123 } 1124 CIF_UNLOCK(ifp->if_carp); 1125 1126 return (0); 1127 } 1128 1129 /* Master down timeout event, executed in callout context. */ 1130 static void 1131 carp_master_down(void *v) 1132 { 1133 struct carp_softc *sc = v; 1134 1135 CARP_LOCK_ASSERT(sc); 1136 1137 CURVNET_SET(sc->sc_carpdev->if_vnet); 1138 if (sc->sc_state == BACKUP) { 1139 CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n", 1140 sc->sc_vhid, 1141 sc->sc_carpdev->if_xname); 1142 carp_master_down_locked(sc); 1143 } 1144 CURVNET_RESTORE(); 1145 1146 CARP_UNLOCK(sc); 1147 } 1148 1149 static void 1150 carp_master_down_locked(struct carp_softc *sc) 1151 { 1152 1153 CARP_LOCK_ASSERT(sc); 1154 1155 switch (sc->sc_state) { 1156 case BACKUP: 1157 carp_set_state(sc, MASTER); 1158 carp_send_ad_locked(sc); 1159 #ifdef INET 1160 carp_send_arp(sc); 1161 #endif 1162 #ifdef INET6 1163 carp_send_na(sc); 1164 #endif 1165 carp_setrun(sc, 0); 1166 carp_addroute(sc); 1167 break; 1168 case INIT: 1169 case MASTER: 1170 #ifdef INVARIANTS 1171 panic("carp: VHID %u@%s: master_down event in %s state\n", 1172 sc->sc_vhid, 1173 sc->sc_carpdev->if_xname, 1174 sc->sc_state ? "MASTER" : "INIT"); 1175 #endif 1176 break; 1177 } 1178 } 1179 1180 /* 1181 * When in backup state, af indicates whether to reset the master down timer 1182 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1183 */ 1184 static void 1185 carp_setrun(struct carp_softc *sc, sa_family_t af) 1186 { 1187 struct timeval tv; 1188 1189 CARP_LOCK_ASSERT(sc); 1190 1191 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1192 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1193 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)) 1194 return; 1195 1196 switch (sc->sc_state) { 1197 case INIT: 1198 CARP_LOG("VHID %u@%s: INIT -> BACKUP\n", 1199 sc->sc_vhid, 1200 sc->sc_carpdev->if_xname); 1201 carp_set_state(sc, BACKUP); 1202 carp_setrun(sc, 0); 1203 break; 1204 case BACKUP: 1205 callout_stop(&sc->sc_ad_tmo); 1206 tv.tv_sec = 3 * sc->sc_advbase; 1207 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1208 switch (af) { 1209 #ifdef INET 1210 case AF_INET: 1211 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1212 carp_master_down, sc); 1213 break; 1214 #endif 1215 #ifdef INET6 1216 case AF_INET6: 1217 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1218 carp_master_down, sc); 1219 break; 1220 #endif 1221 default: 1222 #ifdef INET 1223 if (sc->sc_naddrs) 1224 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1225 carp_master_down, sc); 1226 #endif 1227 #ifdef INET6 1228 if (sc->sc_naddrs6) 1229 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1230 carp_master_down, sc); 1231 #endif 1232 break; 1233 } 1234 break; 1235 case MASTER: 1236 tv.tv_sec = sc->sc_advbase; 1237 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1238 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1239 carp_send_ad, sc); 1240 break; 1241 } 1242 } 1243 1244 /* 1245 * Setup multicast structures. 1246 */ 1247 static int 1248 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1249 { 1250 struct ifnet *ifp = cif->cif_ifp; 1251 int error = 0; 1252 1253 CIF_LOCK_ASSERT(cif); 1254 1255 switch (sa) { 1256 #ifdef INET 1257 case AF_INET: 1258 { 1259 struct ip_moptions *imo = &cif->cif_imo; 1260 struct in_addr addr; 1261 1262 if (imo->imo_membership) 1263 return (0); 1264 1265 imo->imo_membership = (struct in_multi **)malloc( 1266 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 1267 M_NOWAIT); 1268 if (imo->imo_membership == NULL) 1269 return (ENOMEM); 1270 imo->imo_mfilters = NULL; 1271 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1272 imo->imo_multicast_vif = -1; 1273 1274 addr.s_addr = htonl(INADDR_CARP_GROUP); 1275 if ((error = in_joingroup(ifp, &addr, NULL, 1276 &imo->imo_membership[0])) != 0) { 1277 free(imo->imo_membership, M_CARP); 1278 break; 1279 } 1280 imo->imo_num_memberships++; 1281 imo->imo_multicast_ifp = ifp; 1282 imo->imo_multicast_ttl = CARP_DFLTTL; 1283 imo->imo_multicast_loop = 0; 1284 break; 1285 } 1286 #endif 1287 #ifdef INET6 1288 case AF_INET6: 1289 { 1290 struct ip6_moptions *im6o = &cif->cif_im6o; 1291 struct in6_addr in6; 1292 struct in6_multi *in6m; 1293 1294 if (im6o->im6o_membership) 1295 return (0); 1296 1297 im6o->im6o_membership = (struct in6_multi **)malloc( 1298 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 1299 M_ZERO | M_NOWAIT); 1300 if (im6o->im6o_membership == NULL) 1301 return (ENOMEM); 1302 im6o->im6o_mfilters = NULL; 1303 im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 1304 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1305 im6o->im6o_multicast_ifp = ifp; 1306 1307 /* Join IPv6 CARP multicast group. */ 1308 bzero(&in6, sizeof(in6)); 1309 in6.s6_addr16[0] = htons(0xff02); 1310 in6.s6_addr8[15] = 0x12; 1311 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1312 free(im6o->im6o_membership, M_CARP); 1313 break; 1314 } 1315 in6m = NULL; 1316 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { 1317 free(im6o->im6o_membership, M_CARP); 1318 break; 1319 } 1320 im6o->im6o_membership[0] = in6m; 1321 im6o->im6o_num_memberships++; 1322 1323 /* Join solicited multicast address. */ 1324 bzero(&in6, sizeof(in6)); 1325 in6.s6_addr16[0] = htons(0xff02); 1326 in6.s6_addr32[1] = 0; 1327 in6.s6_addr32[2] = htonl(1); 1328 in6.s6_addr32[3] = 0; 1329 in6.s6_addr8[12] = 0xff; 1330 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1331 in6_mc_leave(im6o->im6o_membership[0], NULL); 1332 free(im6o->im6o_membership, M_CARP); 1333 break; 1334 } 1335 in6m = NULL; 1336 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { 1337 in6_mc_leave(im6o->im6o_membership[0], NULL); 1338 free(im6o->im6o_membership, M_CARP); 1339 break; 1340 } 1341 im6o->im6o_membership[1] = in6m; 1342 im6o->im6o_num_memberships++; 1343 break; 1344 } 1345 #endif 1346 } 1347 1348 return (error); 1349 } 1350 1351 /* 1352 * Free multicast structures. 1353 */ 1354 static void 1355 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1356 { 1357 1358 CIF_LOCK_ASSERT(cif); 1359 switch (sa) { 1360 #ifdef INET 1361 case AF_INET: 1362 if (cif->cif_naddrs == 0) { 1363 struct ip_moptions *imo = &cif->cif_imo; 1364 1365 in_leavegroup(imo->imo_membership[0], NULL); 1366 KASSERT(imo->imo_mfilters == NULL, 1367 ("%s: imo_mfilters != NULL", __func__)); 1368 free(imo->imo_membership, M_CARP); 1369 imo->imo_membership = NULL; 1370 1371 } 1372 break; 1373 #endif 1374 #ifdef INET6 1375 case AF_INET6: 1376 if (cif->cif_naddrs6 == 0) { 1377 struct ip6_moptions *im6o = &cif->cif_im6o; 1378 1379 in6_mc_leave(im6o->im6o_membership[0], NULL); 1380 in6_mc_leave(im6o->im6o_membership[1], NULL); 1381 KASSERT(im6o->im6o_mfilters == NULL, 1382 ("%s: im6o_mfilters != NULL", __func__)); 1383 free(im6o->im6o_membership, M_CARP); 1384 im6o->im6o_membership = NULL; 1385 } 1386 break; 1387 #endif 1388 } 1389 } 1390 1391 int 1392 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1393 { 1394 struct m_tag *mtag; 1395 struct carp_softc *sc; 1396 1397 if (!sa) 1398 return (0); 1399 1400 switch (sa->sa_family) { 1401 #ifdef INET 1402 case AF_INET: 1403 break; 1404 #endif 1405 #ifdef INET6 1406 case AF_INET6: 1407 break; 1408 #endif 1409 default: 1410 return (0); 1411 } 1412 1413 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1414 if (mtag == NULL) 1415 return (0); 1416 1417 bcopy(mtag + 1, &sc, sizeof(sc)); 1418 1419 /* Set the source MAC address to the Virtual Router MAC Address. */ 1420 switch (ifp->if_type) { 1421 case IFT_ETHER: 1422 case IFT_BRIDGE: 1423 case IFT_L2VLAN: { 1424 struct ether_header *eh; 1425 1426 eh = mtod(m, struct ether_header *); 1427 eh->ether_shost[0] = 0; 1428 eh->ether_shost[1] = 0; 1429 eh->ether_shost[2] = 0x5e; 1430 eh->ether_shost[3] = 0; 1431 eh->ether_shost[4] = 1; 1432 eh->ether_shost[5] = sc->sc_vhid; 1433 } 1434 break; 1435 case IFT_FDDI: { 1436 struct fddi_header *fh; 1437 1438 fh = mtod(m, struct fddi_header *); 1439 fh->fddi_shost[0] = 0; 1440 fh->fddi_shost[1] = 0; 1441 fh->fddi_shost[2] = 0x5e; 1442 fh->fddi_shost[3] = 0; 1443 fh->fddi_shost[4] = 1; 1444 fh->fddi_shost[5] = sc->sc_vhid; 1445 } 1446 break; 1447 case IFT_ISO88025: { 1448 struct iso88025_header *th; 1449 th = mtod(m, struct iso88025_header *); 1450 th->iso88025_shost[0] = 3; 1451 th->iso88025_shost[1] = 0; 1452 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 1453 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 1454 th->iso88025_shost[4] = 0; 1455 th->iso88025_shost[5] = 0; 1456 } 1457 break; 1458 default: 1459 printf("%s: carp is not supported for the %d interface type\n", 1460 ifp->if_xname, ifp->if_type); 1461 return (EOPNOTSUPP); 1462 } 1463 1464 return (0); 1465 } 1466 1467 static struct carp_softc* 1468 carp_alloc(struct ifnet *ifp) 1469 { 1470 struct carp_softc *sc; 1471 struct carp_if *cif; 1472 1473 if ((cif = ifp->if_carp) == NULL) { 1474 cif = carp_alloc_if(ifp); 1475 if (cif == NULL) 1476 return (NULL); 1477 } 1478 1479 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1480 1481 sc->sc_advbase = CARP_DFLTINTV; 1482 sc->sc_vhid = -1; /* required setting */ 1483 sc->sc_init_counter = 1; 1484 sc->sc_state = INIT; 1485 1486 sc->sc_ifasiz = sizeof(struct ifaddr *); 1487 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1488 sc->sc_carpdev = ifp; 1489 1490 CARP_LOCK_INIT(sc); 1491 #ifdef INET 1492 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1493 #endif 1494 #ifdef INET6 1495 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1496 #endif 1497 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1498 1499 CIF_LOCK(cif); 1500 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1501 CIF_UNLOCK(cif); 1502 1503 mtx_lock(&carp_mtx); 1504 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1505 mtx_unlock(&carp_mtx); 1506 1507 return (sc); 1508 } 1509 1510 static int 1511 carp_grow_ifas(struct carp_softc *sc) 1512 { 1513 struct ifaddr **new; 1514 1515 CARP_LOCK_ASSERT(sc); 1516 1517 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO); 1518 if (new == NULL) 1519 return (ENOMEM); 1520 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1521 free(sc->sc_ifas, M_CARP); 1522 sc->sc_ifas = new; 1523 sc->sc_ifasiz *= 2; 1524 1525 return (0); 1526 } 1527 1528 static void 1529 carp_destroy(struct carp_softc *sc) 1530 { 1531 struct ifnet *ifp = sc->sc_carpdev; 1532 struct carp_if *cif = ifp->if_carp; 1533 1534 CIF_LOCK_ASSERT(cif); 1535 1536 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1537 1538 mtx_lock(&carp_mtx); 1539 LIST_REMOVE(sc, sc_next); 1540 mtx_unlock(&carp_mtx); 1541 1542 CARP_LOCK(sc); 1543 if (sc->sc_suppress) 1544 carp_demote_adj(-carp_ifdown_adj, "vhid removed"); 1545 callout_drain(&sc->sc_ad_tmo); 1546 #ifdef INET 1547 callout_drain(&sc->sc_md_tmo); 1548 #endif 1549 #ifdef INET6 1550 callout_drain(&sc->sc_md6_tmo); 1551 #endif 1552 CARP_LOCK_DESTROY(sc); 1553 1554 free(sc->sc_ifas, M_CARP); 1555 free(sc, M_CARP); 1556 } 1557 1558 static struct carp_if* 1559 carp_alloc_if(struct ifnet *ifp) 1560 { 1561 struct carp_if *cif; 1562 1563 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1564 1565 if (ifpromisc(ifp, 1) != 0) 1566 goto cleanup; 1567 1568 CIF_LOCK_INIT(cif); 1569 cif->cif_ifp = ifp; 1570 TAILQ_INIT(&cif->cif_vrs); 1571 1572 IF_ADDR_WLOCK(ifp); 1573 ifp->if_carp = cif; 1574 if_ref(ifp); 1575 IF_ADDR_WUNLOCK(ifp); 1576 1577 return (cif); 1578 1579 cleanup: 1580 free(cif, M_CARP); 1581 1582 return (NULL); 1583 } 1584 1585 static void 1586 carp_free_if(struct carp_if *cif) 1587 { 1588 struct ifnet *ifp = cif->cif_ifp; 1589 1590 CIF_LOCK_ASSERT(cif); 1591 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1592 __func__)); 1593 1594 IF_ADDR_WLOCK(ifp); 1595 ifp->if_carp = NULL; 1596 if_rele(ifp); 1597 IF_ADDR_WUNLOCK(ifp); 1598 1599 CIF_LOCK_DESTROY(cif); 1600 1601 ifpromisc(ifp, 0); 1602 1603 free(cif, M_CARP); 1604 } 1605 1606 static void 1607 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1608 { 1609 1610 CARP_LOCK(sc); 1611 carpr->carpr_state = sc->sc_state; 1612 carpr->carpr_vhid = sc->sc_vhid; 1613 carpr->carpr_advbase = sc->sc_advbase; 1614 carpr->carpr_advskew = sc->sc_advskew; 1615 if (priv) 1616 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1617 else 1618 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1619 CARP_UNLOCK(sc); 1620 } 1621 1622 int 1623 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1624 { 1625 struct carpreq carpr; 1626 struct ifnet *ifp; 1627 struct carp_softc *sc = NULL; 1628 int error = 0, locked = 0; 1629 1630 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1631 return (error); 1632 1633 ifp = ifunit_ref(ifr->ifr_name); 1634 if (ifp == NULL) 1635 return (ENXIO); 1636 1637 switch (ifp->if_type) { 1638 case IFT_ETHER: 1639 case IFT_L2VLAN: 1640 case IFT_BRIDGE: 1641 case IFT_FDDI: 1642 case IFT_ISO88025: 1643 break; 1644 default: 1645 error = EOPNOTSUPP; 1646 goto out; 1647 } 1648 1649 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1650 error = EADDRNOTAVAIL; 1651 goto out; 1652 } 1653 1654 switch (cmd) { 1655 case SIOCSVH: 1656 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1657 break; 1658 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1659 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1660 error = EINVAL; 1661 break; 1662 } 1663 1664 if (ifp->if_carp) { 1665 CIF_LOCK(ifp->if_carp); 1666 IFNET_FOREACH_CARP(ifp, sc) 1667 if (sc->sc_vhid == carpr.carpr_vhid) 1668 break; 1669 CIF_UNLOCK(ifp->if_carp); 1670 } 1671 if (sc == NULL) { 1672 sc = carp_alloc(ifp); 1673 if (sc == NULL) { 1674 error = EINVAL; /* XXX: ifpromisc failed */ 1675 break; 1676 } 1677 1678 CARP_LOCK(sc); 1679 sc->sc_vhid = carpr.carpr_vhid; 1680 LLADDR(&sc->sc_addr)[0] = 0; 1681 LLADDR(&sc->sc_addr)[1] = 0; 1682 LLADDR(&sc->sc_addr)[2] = 0x5e; 1683 LLADDR(&sc->sc_addr)[3] = 0; 1684 LLADDR(&sc->sc_addr)[4] = 1; 1685 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1686 } else 1687 CARP_LOCK(sc); 1688 locked = 1; 1689 if (carpr.carpr_advbase > 0) { 1690 if (carpr.carpr_advbase > 255 || 1691 carpr.carpr_advbase < CARP_DFLTINTV) { 1692 error = EINVAL; 1693 break; 1694 } 1695 sc->sc_advbase = carpr.carpr_advbase; 1696 } 1697 if (carpr.carpr_advskew > 0) { 1698 if (carpr.carpr_advskew >= 255) { 1699 error = EINVAL; 1700 break; 1701 } 1702 sc->sc_advskew = carpr.carpr_advskew; 1703 } 1704 if (carpr.carpr_key[0] != '\0') { 1705 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1706 carp_hmac_prepare(sc); 1707 } 1708 if (sc->sc_state != INIT && 1709 carpr.carpr_state != sc->sc_state) { 1710 switch (carpr.carpr_state) { 1711 case BACKUP: 1712 callout_stop(&sc->sc_ad_tmo); 1713 carp_set_state(sc, BACKUP); 1714 carp_setrun(sc, 0); 1715 carp_delroute(sc); 1716 break; 1717 case MASTER: 1718 carp_master_down_locked(sc); 1719 break; 1720 default: 1721 break; 1722 } 1723 } 1724 break; 1725 1726 case SIOCGVH: 1727 { 1728 int priveleged; 1729 1730 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1731 error = EINVAL; 1732 break; 1733 } 1734 if (carpr.carpr_count < 1) { 1735 error = EMSGSIZE; 1736 break; 1737 } 1738 if (ifp->if_carp == NULL) { 1739 error = ENOENT; 1740 break; 1741 } 1742 1743 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1744 if (carpr.carpr_vhid != 0) { 1745 CIF_LOCK(ifp->if_carp); 1746 IFNET_FOREACH_CARP(ifp, sc) 1747 if (sc->sc_vhid == carpr.carpr_vhid) 1748 break; 1749 CIF_UNLOCK(ifp->if_carp); 1750 if (sc == NULL) { 1751 error = ENOENT; 1752 break; 1753 } 1754 carp_carprcp(&carpr, sc, priveleged); 1755 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1756 } else { 1757 int i, count; 1758 1759 count = 0; 1760 CIF_LOCK(ifp->if_carp); 1761 IFNET_FOREACH_CARP(ifp, sc) 1762 count++; 1763 1764 if (count > carpr.carpr_count) { 1765 CIF_UNLOCK(ifp->if_carp); 1766 error = EMSGSIZE; 1767 break; 1768 } 1769 1770 i = 0; 1771 IFNET_FOREACH_CARP(ifp, sc) { 1772 carp_carprcp(&carpr, sc, priveleged); 1773 carpr.carpr_count = count; 1774 error = copyout(&carpr, ifr->ifr_data + 1775 (i * sizeof(carpr)), sizeof(carpr)); 1776 if (error) { 1777 CIF_UNLOCK(ifp->if_carp); 1778 break; 1779 } 1780 i++; 1781 } 1782 CIF_UNLOCK(ifp->if_carp); 1783 } 1784 break; 1785 } 1786 default: 1787 error = EINVAL; 1788 } 1789 1790 out: 1791 if (locked) 1792 CARP_UNLOCK(sc); 1793 if_rele(ifp); 1794 1795 return (error); 1796 } 1797 1798 static int 1799 carp_get_vhid(struct ifaddr *ifa) 1800 { 1801 1802 if (ifa == NULL || ifa->ifa_carp == NULL) 1803 return (0); 1804 1805 return (ifa->ifa_carp->sc_vhid); 1806 } 1807 1808 int 1809 carp_attach(struct ifaddr *ifa, int vhid) 1810 { 1811 struct ifnet *ifp = ifa->ifa_ifp; 1812 struct carp_if *cif = ifp->if_carp; 1813 struct carp_softc *sc; 1814 int index, error; 1815 1816 if (ifp->if_carp == NULL) 1817 return (ENOPROTOOPT); 1818 1819 switch (ifa->ifa_addr->sa_family) { 1820 #ifdef INET 1821 case AF_INET: 1822 #endif 1823 #ifdef INET6 1824 case AF_INET6: 1825 #endif 1826 break; 1827 default: 1828 return (EPROTOTYPE); 1829 } 1830 1831 CIF_LOCK(cif); 1832 IFNET_FOREACH_CARP(ifp, sc) 1833 if (sc->sc_vhid == vhid) 1834 break; 1835 if (sc == NULL) { 1836 CIF_UNLOCK(cif); 1837 return (ENOENT); 1838 } 1839 1840 if (ifa->ifa_carp) { 1841 if (ifa->ifa_carp->sc_vhid != vhid) 1842 carp_detach_locked(ifa); 1843 else { 1844 CIF_UNLOCK(cif); 1845 return (0); 1846 } 1847 } 1848 1849 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1850 if (error) { 1851 CIF_FREE(cif); 1852 return (error); 1853 } 1854 1855 CARP_LOCK(sc); 1856 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1857 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1858 if ((error = carp_grow_ifas(sc)) != 0) { 1859 carp_multicast_cleanup(cif, 1860 ifa->ifa_addr->sa_family); 1861 CARP_UNLOCK(sc); 1862 CIF_FREE(cif); 1863 return (error); 1864 } 1865 1866 switch (ifa->ifa_addr->sa_family) { 1867 #ifdef INET 1868 case AF_INET: 1869 cif->cif_naddrs++; 1870 sc->sc_naddrs++; 1871 break; 1872 #endif 1873 #ifdef INET6 1874 case AF_INET6: 1875 cif->cif_naddrs6++; 1876 sc->sc_naddrs6++; 1877 break; 1878 #endif 1879 } 1880 1881 ifa_ref(ifa); 1882 sc->sc_ifas[index - 1] = ifa; 1883 ifa->ifa_carp = sc; 1884 1885 carp_hmac_prepare(sc); 1886 carp_sc_state(sc); 1887 1888 CARP_UNLOCK(sc); 1889 CIF_UNLOCK(cif); 1890 1891 return (0); 1892 } 1893 1894 void 1895 carp_detach(struct ifaddr *ifa) 1896 { 1897 struct ifnet *ifp = ifa->ifa_ifp; 1898 struct carp_if *cif = ifp->if_carp; 1899 1900 CIF_LOCK(cif); 1901 carp_detach_locked(ifa); 1902 CIF_FREE(cif); 1903 } 1904 1905 static void 1906 carp_detach_locked(struct ifaddr *ifa) 1907 { 1908 struct ifnet *ifp = ifa->ifa_ifp; 1909 struct carp_if *cif = ifp->if_carp; 1910 struct carp_softc *sc = ifa->ifa_carp; 1911 int i, index; 1912 1913 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1914 1915 CIF_LOCK_ASSERT(cif); 1916 CARP_LOCK(sc); 1917 1918 /* Shift array. */ 1919 index = sc->sc_naddrs + sc->sc_naddrs6; 1920 for (i = 0; i < index; i++) 1921 if (sc->sc_ifas[i] == ifa) 1922 break; 1923 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1924 for (; i < index - 1; i++) 1925 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1926 sc->sc_ifas[index - 1] = NULL; 1927 1928 switch (ifa->ifa_addr->sa_family) { 1929 #ifdef INET 1930 case AF_INET: 1931 cif->cif_naddrs--; 1932 sc->sc_naddrs--; 1933 break; 1934 #endif 1935 #ifdef INET6 1936 case AF_INET6: 1937 cif->cif_naddrs6--; 1938 sc->sc_naddrs6--; 1939 break; 1940 #endif 1941 } 1942 1943 carp_ifa_delroute(ifa); 1944 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 1945 1946 ifa->ifa_carp = NULL; 1947 ifa_free(ifa); 1948 1949 carp_hmac_prepare(sc); 1950 carp_sc_state(sc); 1951 1952 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1953 CARP_UNLOCK(sc); 1954 carp_destroy(sc); 1955 } else 1956 CARP_UNLOCK(sc); 1957 } 1958 1959 static void 1960 carp_set_state(struct carp_softc *sc, int state) 1961 { 1962 1963 CARP_LOCK_ASSERT(sc); 1964 1965 if (sc->sc_state != state) { 1966 const char *carp_states[] = { CARP_STATES }; 1967 char subsys[IFNAMSIZ+5]; 1968 1969 sc->sc_state = state; 1970 1971 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 1972 sc->sc_carpdev->if_xname); 1973 devctl_notify("CARP", subsys, carp_states[state], NULL); 1974 } 1975 } 1976 1977 static void 1978 carp_linkstate(struct ifnet *ifp) 1979 { 1980 struct carp_softc *sc; 1981 1982 CIF_LOCK(ifp->if_carp); 1983 IFNET_FOREACH_CARP(ifp, sc) { 1984 CARP_LOCK(sc); 1985 carp_sc_state(sc); 1986 CARP_UNLOCK(sc); 1987 } 1988 CIF_UNLOCK(ifp->if_carp); 1989 } 1990 1991 static void 1992 carp_sc_state(struct carp_softc *sc) 1993 { 1994 1995 CARP_LOCK_ASSERT(sc); 1996 1997 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1998 !(sc->sc_carpdev->if_flags & IFF_UP)) { 1999 callout_stop(&sc->sc_ad_tmo); 2000 #ifdef INET 2001 callout_stop(&sc->sc_md_tmo); 2002 #endif 2003 #ifdef INET6 2004 callout_stop(&sc->sc_md6_tmo); 2005 #endif 2006 carp_set_state(sc, INIT); 2007 carp_setrun(sc, 0); 2008 if (!sc->sc_suppress) 2009 carp_demote_adj(carp_ifdown_adj, "interface down"); 2010 sc->sc_suppress = 1; 2011 } else { 2012 carp_set_state(sc, INIT); 2013 carp_setrun(sc, 0); 2014 if (sc->sc_suppress) 2015 carp_demote_adj(-carp_ifdown_adj, "interface up"); 2016 sc->sc_suppress = 0; 2017 } 2018 } 2019 2020 static void 2021 carp_demote_adj(int adj, char *reason) 2022 { 2023 atomic_add_int(&carp_demotion, adj); 2024 CARP_LOG("demoted by %d to %d (%s)\n", adj, carp_demotion, reason); 2025 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2026 } 2027 2028 static int 2029 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2030 { 2031 int new, error; 2032 2033 new = carp_demotion; 2034 error = sysctl_handle_int(oidp, &new, 0, req); 2035 if (error || !req->newptr) 2036 return (error); 2037 2038 carp_demote_adj(new, "sysctl"); 2039 2040 return (0); 2041 } 2042 2043 #ifdef INET 2044 extern struct domain inetdomain; 2045 static struct protosw in_carp_protosw = { 2046 .pr_type = SOCK_RAW, 2047 .pr_domain = &inetdomain, 2048 .pr_protocol = IPPROTO_CARP, 2049 .pr_flags = PR_ATOMIC|PR_ADDR, 2050 .pr_input = carp_input, 2051 .pr_output = (pr_output_t *)rip_output, 2052 .pr_ctloutput = rip_ctloutput, 2053 .pr_usrreqs = &rip_usrreqs 2054 }; 2055 #endif 2056 2057 #ifdef INET6 2058 extern struct domain inet6domain; 2059 static struct ip6protosw in6_carp_protosw = { 2060 .pr_type = SOCK_RAW, 2061 .pr_domain = &inet6domain, 2062 .pr_protocol = IPPROTO_CARP, 2063 .pr_flags = PR_ATOMIC|PR_ADDR, 2064 .pr_input = carp6_input, 2065 .pr_output = rip6_output, 2066 .pr_ctloutput = rip6_ctloutput, 2067 .pr_usrreqs = &rip6_usrreqs 2068 }; 2069 #endif 2070 2071 static void 2072 carp_mod_cleanup(void) 2073 { 2074 2075 #ifdef INET 2076 if (proto_reg[CARP_INET] == 0) { 2077 (void)ipproto_unregister(IPPROTO_CARP); 2078 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2079 proto_reg[CARP_INET] = -1; 2080 } 2081 carp_iamatch_p = NULL; 2082 #endif 2083 #ifdef INET6 2084 if (proto_reg[CARP_INET6] == 0) { 2085 (void)ip6proto_unregister(IPPROTO_CARP); 2086 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2087 proto_reg[CARP_INET6] = -1; 2088 } 2089 carp_iamatch6_p = NULL; 2090 carp_macmatch6_p = NULL; 2091 #endif 2092 carp_ioctl_p = NULL; 2093 carp_attach_p = NULL; 2094 carp_detach_p = NULL; 2095 carp_get_vhid_p = NULL; 2096 carp_linkstate_p = NULL; 2097 carp_forus_p = NULL; 2098 carp_output_p = NULL; 2099 carp_demote_adj_p = NULL; 2100 carp_master_p = NULL; 2101 mtx_unlock(&carp_mtx); 2102 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2103 mtx_destroy(&carp_mtx); 2104 COUNTER_ARRAY_FREE(carpstats, 2105 sizeof(struct carpstats) / sizeof(uint64_t)); 2106 } 2107 2108 static int 2109 carp_mod_load(void) 2110 { 2111 int err; 2112 2113 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2114 LIST_INIT(&carp_list); 2115 COUNTER_ARRAY_ALLOC(carpstats, 2116 sizeof(struct carpstats) / sizeof(uint64_t), M_WAITOK); 2117 carp_get_vhid_p = carp_get_vhid; 2118 carp_forus_p = carp_forus; 2119 carp_output_p = carp_output; 2120 carp_linkstate_p = carp_linkstate; 2121 carp_ioctl_p = carp_ioctl; 2122 carp_attach_p = carp_attach; 2123 carp_detach_p = carp_detach; 2124 carp_demote_adj_p = carp_demote_adj; 2125 carp_master_p = carp_master; 2126 #ifdef INET6 2127 carp_iamatch6_p = carp_iamatch6; 2128 carp_macmatch6_p = carp_macmatch6; 2129 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2130 (struct protosw *)&in6_carp_protosw); 2131 if (proto_reg[CARP_INET6]) { 2132 printf("carp: error %d attaching to PF_INET6\n", 2133 proto_reg[CARP_INET6]); 2134 carp_mod_cleanup(); 2135 return (proto_reg[CARP_INET6]); 2136 } 2137 err = ip6proto_register(IPPROTO_CARP); 2138 if (err) { 2139 printf("carp: error %d registering with INET6\n", err); 2140 carp_mod_cleanup(); 2141 return (err); 2142 } 2143 #endif 2144 #ifdef INET 2145 carp_iamatch_p = carp_iamatch; 2146 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2147 if (proto_reg[CARP_INET]) { 2148 printf("carp: error %d attaching to PF_INET\n", 2149 proto_reg[CARP_INET]); 2150 carp_mod_cleanup(); 2151 return (proto_reg[CARP_INET]); 2152 } 2153 err = ipproto_register(IPPROTO_CARP); 2154 if (err) { 2155 printf("carp: error %d registering with INET\n", err); 2156 carp_mod_cleanup(); 2157 return (err); 2158 } 2159 #endif 2160 return (0); 2161 } 2162 2163 static int 2164 carp_modevent(module_t mod, int type, void *data) 2165 { 2166 switch (type) { 2167 case MOD_LOAD: 2168 return carp_mod_load(); 2169 /* NOTREACHED */ 2170 case MOD_UNLOAD: 2171 mtx_lock(&carp_mtx); 2172 if (LIST_EMPTY(&carp_list)) 2173 carp_mod_cleanup(); 2174 else { 2175 mtx_unlock(&carp_mtx); 2176 return (EBUSY); 2177 } 2178 break; 2179 2180 default: 2181 return (EINVAL); 2182 } 2183 2184 return (0); 2185 } 2186 2187 static moduledata_t carp_mod = { 2188 "carp", 2189 carp_modevent, 2190 0 2191 }; 2192 2193 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2194