1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_carp.h" 31 #include "opt_bpf.h" 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/conf.h> 39 #include <sys/kernel.h> 40 #include <sys/limits.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/module.h> 44 #include <sys/time.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/signalvar.h> 50 #include <sys/filio.h> 51 #include <sys/sockio.h> 52 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 56 #include <machine/stdarg.h> 57 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 #include <net/fddi.h> 61 #include <net/iso88025.h> 62 #include <net/if.h> 63 #include <net/if_clone.h> 64 #include <net/if_dl.h> 65 #include <net/if_types.h> 66 #include <net/route.h> 67 68 #ifdef INET 69 #include <netinet/in.h> 70 #include <netinet/in_var.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/ip.h> 73 #include <netinet/ip_var.h> 74 #include <netinet/if_ether.h> 75 #include <machine/in_cksum.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/ip6_var.h> 82 #include <netinet6/scope6_var.h> 83 #include <netinet6/nd6.h> 84 #endif 85 86 #include <crypto/sha1.h> 87 #include <netinet/ip_carp.h> 88 89 #define CARP_IFNAME "carp" 90 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 91 SYSCTL_DECL(_net_inet_carp); 92 93 struct carp_softc { 94 struct ifnet *sc_ifp; /* Interface clue */ 95 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 96 struct in_ifaddr *sc_ia; /* primary iface address */ 97 struct ip_moptions sc_imo; 98 #ifdef INET6 99 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 100 struct ip6_moptions sc_im6o; 101 #endif /* INET6 */ 102 TAILQ_ENTRY(carp_softc) sc_list; 103 104 enum { INIT = 0, BACKUP, MASTER } sc_state; 105 106 int sc_flags_backup; 107 int sc_suppress; 108 109 int sc_sendad_errors; 110 #define CARP_SENDAD_MAX_ERRORS 3 111 int sc_sendad_success; 112 #define CARP_SENDAD_MIN_SUCCESS 3 113 114 int sc_vhid; 115 int sc_advskew; 116 int sc_naddrs; 117 int sc_naddrs6; 118 int sc_advbase; /* seconds */ 119 int sc_init_counter; 120 u_int64_t sc_counter; 121 122 /* authentication */ 123 #define CARP_HMAC_PAD 64 124 unsigned char sc_key[CARP_KEY_LEN]; 125 unsigned char sc_pad[CARP_HMAC_PAD]; 126 SHA1_CTX sc_sha1; 127 128 struct callout sc_ad_tmo; /* advertisement timeout */ 129 struct callout sc_md_tmo; /* master down timeout */ 130 struct callout sc_md6_tmo; /* master down timeout */ 131 132 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 133 }; 134 #define SC2IFP(sc) ((sc)->sc_ifp) 135 136 int carp_suppress_preempt = 0; 137 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 138 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 139 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 140 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 141 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 142 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 143 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 144 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 145 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 146 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 147 &carp_suppress_preempt, 0, "Preemption is suppressed"); 148 149 struct carpstats carpstats; 150 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 151 &carpstats, carpstats, 152 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 153 154 struct carp_if { 155 TAILQ_HEAD(, carp_softc) vhif_vrs; 156 int vhif_nvrs; 157 158 struct ifnet *vhif_ifp; 159 struct mtx vhif_mtx; 160 }; 161 162 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 163 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 164 165 /* lock per carp_if queue */ 166 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 167 NULL, MTX_DEF) 168 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 169 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 170 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 171 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 172 173 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 174 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 175 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 176 177 #define CARP_LOG(...) do { \ 178 if (carp_opts[CARPCTL_LOG] > 0) \ 179 log(LOG_INFO, __VA_ARGS__); \ 180 } while (0) 181 182 #define CARP_DEBUG(...) do { \ 183 if (carp_opts[CARPCTL_LOG] > 1) \ 184 log(LOG_DEBUG, __VA_ARGS__); \ 185 } while (0) 186 187 static void carp_hmac_prepare(struct carp_softc *); 188 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 189 unsigned char *); 190 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 191 unsigned char *); 192 static void carp_setroute(struct carp_softc *, int); 193 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 194 static int carp_clone_create(struct if_clone *, int, caddr_t); 195 static void carp_clone_destroy(struct ifnet *); 196 static void carpdetach(struct carp_softc *, int); 197 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 198 struct carp_header *); 199 static void carp_send_ad_all(void); 200 static void carp_send_ad(void *); 201 static void carp_send_ad_locked(struct carp_softc *); 202 static void carp_send_arp(struct carp_softc *); 203 static void carp_master_down(void *); 204 static void carp_master_down_locked(struct carp_softc *); 205 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 206 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 207 struct rtentry *); 208 static void carp_start(struct ifnet *); 209 static void carp_setrun(struct carp_softc *, sa_family_t); 210 static void carp_set_state(struct carp_softc *, int); 211 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 212 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 213 214 static void carp_multicast_cleanup(struct carp_softc *); 215 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 216 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 217 static void carp_carpdev_state_locked(struct carp_if *); 218 static void carp_sc_state_locked(struct carp_softc *); 219 #ifdef INET6 220 static void carp_send_na(struct carp_softc *); 221 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 222 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 223 static void carp_multicast6_cleanup(struct carp_softc *); 224 #endif 225 226 static LIST_HEAD(, carp_softc) carpif_list; 227 static struct mtx carp_mtx; 228 IFC_SIMPLE_DECLARE(carp, 0); 229 230 static eventhandler_tag if_detach_event_tag; 231 232 static __inline u_int16_t 233 carp_cksum(struct mbuf *m, int len) 234 { 235 return (in_cksum(m, len)); 236 } 237 238 static void 239 carp_hmac_prepare(struct carp_softc *sc) 240 { 241 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 242 u_int8_t vhid = sc->sc_vhid & 0xff; 243 struct ifaddr *ifa; 244 int i, found; 245 #ifdef INET 246 struct in_addr last, cur, in; 247 #endif 248 #ifdef INET6 249 struct in6_addr last6, cur6, in6; 250 #endif 251 252 if (sc->sc_carpdev) 253 CARP_SCLOCK(sc); 254 255 /* XXX: possible race here */ 256 257 /* compute ipad from key */ 258 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 259 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 260 for (i = 0; i < sizeof(sc->sc_pad); i++) 261 sc->sc_pad[i] ^= 0x36; 262 263 /* precompute first part of inner hash */ 264 SHA1Init(&sc->sc_sha1); 265 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 266 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 267 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 268 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 269 #ifdef INET 270 cur.s_addr = 0; 271 do { 272 found = 0; 273 last = cur; 274 cur.s_addr = 0xffffffff; 275 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 276 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 277 if (ifa->ifa_addr->sa_family == AF_INET && 278 ntohl(in.s_addr) > ntohl(last.s_addr) && 279 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 280 cur.s_addr = in.s_addr; 281 found++; 282 } 283 } 284 if (found) 285 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 286 } while (found); 287 #endif /* INET */ 288 #ifdef INET6 289 memset(&cur6, 0, sizeof(cur6)); 290 do { 291 found = 0; 292 last6 = cur6; 293 memset(&cur6, 0xff, sizeof(cur6)); 294 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 295 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 296 if (IN6_IS_SCOPE_EMBED(&in6)) 297 in6.s6_addr16[1] = 0; 298 if (ifa->ifa_addr->sa_family == AF_INET6 && 299 memcmp(&in6, &last6, sizeof(in6)) > 0 && 300 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 301 cur6 = in6; 302 found++; 303 } 304 } 305 if (found) 306 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 307 } while (found); 308 #endif /* INET6 */ 309 310 /* convert ipad to opad */ 311 for (i = 0; i < sizeof(sc->sc_pad); i++) 312 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 313 314 if (sc->sc_carpdev) 315 CARP_SCUNLOCK(sc); 316 } 317 318 static void 319 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 320 unsigned char md[20]) 321 { 322 SHA1_CTX sha1ctx; 323 324 /* fetch first half of inner hash */ 325 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 326 327 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 328 SHA1Final(md, &sha1ctx); 329 330 /* outer hash */ 331 SHA1Init(&sha1ctx); 332 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 333 SHA1Update(&sha1ctx, md, 20); 334 SHA1Final(md, &sha1ctx); 335 } 336 337 static int 338 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 339 unsigned char md[20]) 340 { 341 unsigned char md2[20]; 342 343 CARP_SCLOCK_ASSERT(sc); 344 345 carp_hmac_generate(sc, counter, md2); 346 347 return (bcmp(md, md2, sizeof(md2))); 348 } 349 350 static void 351 carp_setroute(struct carp_softc *sc, int cmd) 352 { 353 struct ifaddr *ifa; 354 int s; 355 356 if (sc->sc_carpdev) 357 CARP_SCLOCK_ASSERT(sc); 358 359 s = splnet(); 360 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 361 if (ifa->ifa_addr->sa_family == AF_INET && 362 sc->sc_carpdev != NULL) { 363 int count = carp_addrcount( 364 (struct carp_if *)sc->sc_carpdev->if_carp, 365 ifatoia(ifa), CARP_COUNT_MASTER); 366 367 if ((cmd == RTM_ADD && count == 1) || 368 (cmd == RTM_DELETE && count == 0)) 369 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 370 } 371 #ifdef INET6 372 if (ifa->ifa_addr->sa_family == AF_INET6) { 373 if (cmd == RTM_ADD) 374 in6_ifaddloop(ifa); 375 else 376 in6_ifremloop(ifa); 377 } 378 #endif /* INET6 */ 379 } 380 splx(s); 381 } 382 383 static int 384 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) 385 { 386 387 struct carp_softc *sc; 388 struct ifnet *ifp; 389 390 MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 391 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); 392 if (ifp == NULL) { 393 FREE(sc, M_CARP); 394 return (ENOSPC); 395 } 396 397 sc->sc_flags_backup = 0; 398 sc->sc_suppress = 0; 399 sc->sc_advbase = CARP_DFLTINTV; 400 sc->sc_vhid = -1; /* required setting */ 401 sc->sc_advskew = 0; 402 sc->sc_init_counter = 1; 403 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 404 #ifdef INET6 405 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 406 #endif 407 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 408 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 409 M_WAITOK); 410 sc->sc_imo.imo_mfilters = NULL; 411 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 412 sc->sc_imo.imo_multicast_vif = -1; 413 414 callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE); 415 callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE); 416 callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE); 417 418 ifp->if_softc = sc; 419 if_initname(ifp, CARP_IFNAME, unit); 420 ifp->if_mtu = ETHERMTU; 421 ifp->if_flags = IFF_LOOPBACK; 422 ifp->if_ioctl = carp_ioctl; 423 ifp->if_output = carp_looutput; 424 ifp->if_start = carp_start; 425 ifp->if_type = IFT_CARP; 426 ifp->if_snd.ifq_maxlen = ifqmaxlen; 427 ifp->if_hdrlen = 0; 428 if_attach(ifp); 429 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 430 mtx_lock(&carp_mtx); 431 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 432 mtx_unlock(&carp_mtx); 433 return (0); 434 } 435 436 static void 437 carp_clone_destroy(struct ifnet *ifp) 438 { 439 struct carp_softc *sc = ifp->if_softc; 440 441 if (sc->sc_carpdev) 442 CARP_SCLOCK(sc); 443 carpdetach(sc, 1); /* Returns unlocked. */ 444 445 mtx_lock(&carp_mtx); 446 LIST_REMOVE(sc, sc_next); 447 mtx_unlock(&carp_mtx); 448 bpfdetach(ifp); 449 if_detach(ifp); 450 if_free_type(ifp, IFT_ETHER); 451 free(sc->sc_imo.imo_membership, M_CARP); 452 free(sc, M_CARP); 453 } 454 455 /* 456 * This function can be called on CARP interface destroy path, 457 * and in case of the removal of the underlying interface as 458 * well. We differentiate these two cases. In the latter case 459 * we do not cleanup our multicast memberships, since they 460 * are already freed. Also, in the latter case we do not 461 * release the lock on return, because the function will be 462 * called once more, for another CARP instance on the same 463 * interface. 464 */ 465 static void 466 carpdetach(struct carp_softc *sc, int unlock) 467 { 468 struct carp_if *cif; 469 470 callout_stop(&sc->sc_ad_tmo); 471 callout_stop(&sc->sc_md_tmo); 472 callout_stop(&sc->sc_md6_tmo); 473 474 if (sc->sc_suppress) 475 carp_suppress_preempt--; 476 sc->sc_suppress = 0; 477 478 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 479 carp_suppress_preempt--; 480 sc->sc_sendad_errors = 0; 481 482 carp_set_state(sc, INIT); 483 SC2IFP(sc)->if_flags &= ~IFF_UP; 484 carp_setrun(sc, 0); 485 if (unlock) 486 carp_multicast_cleanup(sc); 487 #ifdef INET6 488 carp_multicast6_cleanup(sc); 489 #endif 490 491 if (sc->sc_carpdev != NULL) { 492 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 493 CARP_LOCK_ASSERT(cif); 494 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 495 if (!--cif->vhif_nvrs) { 496 ifpromisc(sc->sc_carpdev, 0); 497 sc->sc_carpdev->if_carp = NULL; 498 CARP_LOCK_DESTROY(cif); 499 FREE(cif, M_IFADDR); 500 } else if (unlock) 501 CARP_UNLOCK(cif); 502 sc->sc_carpdev = NULL; 503 } 504 } 505 506 /* Detach an interface from the carp. */ 507 static void 508 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 509 { 510 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 511 struct carp_softc *sc, *nextsc; 512 513 if (cif == NULL) 514 return; 515 516 /* 517 * XXX: At the end of for() cycle the lock will be destroyed. 518 */ 519 CARP_LOCK(cif); 520 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 521 nextsc = TAILQ_NEXT(sc, sc_list); 522 carpdetach(sc, 0); 523 } 524 } 525 526 /* 527 * process input packet. 528 * we have rearranged checks order compared to the rfc, 529 * but it seems more efficient this way or not possible otherwise. 530 */ 531 void 532 carp_input(struct mbuf *m, int hlen) 533 { 534 struct ip *ip = mtod(m, struct ip *); 535 struct carp_header *ch; 536 int iplen, len; 537 538 carpstats.carps_ipackets++; 539 540 if (!carp_opts[CARPCTL_ALLOW]) { 541 m_freem(m); 542 return; 543 } 544 545 /* check if received on a valid carp interface */ 546 if (m->m_pkthdr.rcvif->if_carp == NULL) { 547 carpstats.carps_badif++; 548 CARP_LOG("carp_input: packet received on non-carp " 549 "interface: %s\n", 550 m->m_pkthdr.rcvif->if_xname); 551 m_freem(m); 552 return; 553 } 554 555 /* verify that the IP TTL is 255. */ 556 if (ip->ip_ttl != CARP_DFLTTL) { 557 carpstats.carps_badttl++; 558 CARP_LOG("carp_input: received ttl %d != 255i on %s\n", 559 ip->ip_ttl, 560 m->m_pkthdr.rcvif->if_xname); 561 m_freem(m); 562 return; 563 } 564 565 iplen = ip->ip_hl << 2; 566 567 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 568 carpstats.carps_badlen++; 569 CARP_LOG("carp_input: received len %zd < " 570 "sizeof(struct carp_header)\n", 571 m->m_len - sizeof(struct ip)); 572 m_freem(m); 573 return; 574 } 575 576 if (iplen + sizeof(*ch) < m->m_len) { 577 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 578 carpstats.carps_hdrops++; 579 CARP_LOG("carp_input: pullup failed\n"); 580 return; 581 } 582 ip = mtod(m, struct ip *); 583 } 584 ch = (struct carp_header *)((char *)ip + iplen); 585 586 /* 587 * verify that the received packet length is 588 * equal to the CARP header 589 */ 590 len = iplen + sizeof(*ch); 591 if (len > m->m_pkthdr.len) { 592 carpstats.carps_badlen++; 593 CARP_LOG("carp_input: packet too short %d on %s\n", 594 m->m_pkthdr.len, 595 m->m_pkthdr.rcvif->if_xname); 596 m_freem(m); 597 return; 598 } 599 600 if ((m = m_pullup(m, len)) == NULL) { 601 carpstats.carps_hdrops++; 602 return; 603 } 604 ip = mtod(m, struct ip *); 605 ch = (struct carp_header *)((char *)ip + iplen); 606 607 /* verify the CARP checksum */ 608 m->m_data += iplen; 609 if (carp_cksum(m, len - iplen)) { 610 carpstats.carps_badsum++; 611 CARP_LOG("carp_input: checksum failed on %s\n", 612 m->m_pkthdr.rcvif->if_xname); 613 m_freem(m); 614 return; 615 } 616 m->m_data -= iplen; 617 618 carp_input_c(m, ch, AF_INET); 619 } 620 621 #ifdef INET6 622 int 623 carp6_input(struct mbuf **mp, int *offp, int proto) 624 { 625 struct mbuf *m = *mp; 626 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 627 struct carp_header *ch; 628 u_int len; 629 630 carpstats.carps_ipackets6++; 631 632 if (!carp_opts[CARPCTL_ALLOW]) { 633 m_freem(m); 634 return (IPPROTO_DONE); 635 } 636 637 /* check if received on a valid carp interface */ 638 if (m->m_pkthdr.rcvif->if_carp == NULL) { 639 carpstats.carps_badif++; 640 CARP_LOG("carp6_input: packet received on non-carp " 641 "interface: %s\n", 642 m->m_pkthdr.rcvif->if_xname); 643 m_freem(m); 644 return (IPPROTO_DONE); 645 } 646 647 /* verify that the IP TTL is 255 */ 648 if (ip6->ip6_hlim != CARP_DFLTTL) { 649 carpstats.carps_badttl++; 650 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 651 ip6->ip6_hlim, 652 m->m_pkthdr.rcvif->if_xname); 653 m_freem(m); 654 return (IPPROTO_DONE); 655 } 656 657 /* verify that we have a complete carp packet */ 658 len = m->m_len; 659 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 660 if (ch == NULL) { 661 carpstats.carps_badlen++; 662 CARP_LOG("carp6_input: packet size %u too small\n", len); 663 return (IPPROTO_DONE); 664 } 665 666 667 /* verify the CARP checksum */ 668 m->m_data += *offp; 669 if (carp_cksum(m, sizeof(*ch))) { 670 carpstats.carps_badsum++; 671 CARP_LOG("carp6_input: checksum failed, on %s\n", 672 m->m_pkthdr.rcvif->if_xname); 673 m_freem(m); 674 return (IPPROTO_DONE); 675 } 676 m->m_data -= *offp; 677 678 carp_input_c(m, ch, AF_INET6); 679 return (IPPROTO_DONE); 680 } 681 #endif /* INET6 */ 682 683 static void 684 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 685 { 686 struct ifnet *ifp = m->m_pkthdr.rcvif; 687 struct carp_softc *sc; 688 u_int64_t tmp_counter; 689 struct timeval sc_tv, ch_tv; 690 691 /* verify that the VHID is valid on the receiving interface */ 692 CARP_LOCK(ifp->if_carp); 693 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 694 if (sc->sc_vhid == ch->carp_vhid) 695 break; 696 697 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && 698 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 699 carpstats.carps_badvhid++; 700 CARP_UNLOCK(ifp->if_carp); 701 m_freem(m); 702 return; 703 } 704 705 getmicrotime(&SC2IFP(sc)->if_lastchange); 706 SC2IFP(sc)->if_ipackets++; 707 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; 708 709 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { 710 struct ip *ip = mtod(m, struct ip *); 711 uint32_t af1 = af; 712 713 /* BPF wants net byte order */ 714 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 715 ip->ip_off = htons(ip->ip_off); 716 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); 717 } 718 719 /* verify the CARP version. */ 720 if (ch->carp_version != CARP_VERSION) { 721 carpstats.carps_badver++; 722 SC2IFP(sc)->if_ierrors++; 723 CARP_UNLOCK(ifp->if_carp); 724 CARP_LOG("%s; invalid version %d\n", 725 SC2IFP(sc)->if_xname, 726 ch->carp_version); 727 m_freem(m); 728 return; 729 } 730 731 /* verify the hash */ 732 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 733 carpstats.carps_badauth++; 734 SC2IFP(sc)->if_ierrors++; 735 CARP_UNLOCK(ifp->if_carp); 736 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); 737 m_freem(m); 738 return; 739 } 740 741 tmp_counter = ntohl(ch->carp_counter[0]); 742 tmp_counter = tmp_counter<<32; 743 tmp_counter += ntohl(ch->carp_counter[1]); 744 745 /* XXX Replay protection goes here */ 746 747 sc->sc_init_counter = 0; 748 sc->sc_counter = tmp_counter; 749 750 sc_tv.tv_sec = sc->sc_advbase; 751 if (carp_suppress_preempt && sc->sc_advskew < 240) 752 sc_tv.tv_usec = 240 * 1000000 / 256; 753 else 754 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 755 ch_tv.tv_sec = ch->carp_advbase; 756 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 757 758 switch (sc->sc_state) { 759 case INIT: 760 break; 761 case MASTER: 762 /* 763 * If we receive an advertisement from a master who's going to 764 * be more frequent than us, go into BACKUP state. 765 */ 766 if (timevalcmp(&sc_tv, &ch_tv, >) || 767 timevalcmp(&sc_tv, &ch_tv, ==)) { 768 callout_stop(&sc->sc_ad_tmo); 769 CARP_DEBUG("%s: MASTER -> BACKUP " 770 "(more frequent advertisement received)\n", 771 SC2IFP(sc)->if_xname); 772 carp_set_state(sc, BACKUP); 773 carp_setrun(sc, 0); 774 carp_setroute(sc, RTM_DELETE); 775 } 776 break; 777 case BACKUP: 778 /* 779 * If we're pre-empting masters who advertise slower than us, 780 * and this one claims to be slower, treat him as down. 781 */ 782 if (carp_opts[CARPCTL_PREEMPT] && 783 timevalcmp(&sc_tv, &ch_tv, <)) { 784 CARP_DEBUG("%s: BACKUP -> MASTER " 785 "(preempting a slower master)\n", 786 SC2IFP(sc)->if_xname); 787 carp_master_down_locked(sc); 788 break; 789 } 790 791 /* 792 * If the master is going to advertise at such a low frequency 793 * that he's guaranteed to time out, we'd might as well just 794 * treat him as timed out now. 795 */ 796 sc_tv.tv_sec = sc->sc_advbase * 3; 797 if (timevalcmp(&sc_tv, &ch_tv, <)) { 798 CARP_DEBUG("%s: BACKUP -> MASTER " 799 "(master timed out)\n", 800 SC2IFP(sc)->if_xname); 801 carp_master_down_locked(sc); 802 break; 803 } 804 805 /* 806 * Otherwise, we reset the counter and wait for the next 807 * advertisement. 808 */ 809 carp_setrun(sc, af); 810 break; 811 } 812 813 CARP_UNLOCK(ifp->if_carp); 814 815 m_freem(m); 816 return; 817 } 818 819 static int 820 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 821 { 822 struct m_tag *mtag; 823 struct ifnet *ifp = SC2IFP(sc); 824 825 if (sc->sc_init_counter) { 826 /* this could also be seconds since unix epoch */ 827 sc->sc_counter = arc4random(); 828 sc->sc_counter = sc->sc_counter << 32; 829 sc->sc_counter += arc4random(); 830 } else 831 sc->sc_counter++; 832 833 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 834 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 835 836 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 837 838 /* Tag packet for carp_output */ 839 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 840 if (mtag == NULL) { 841 m_freem(m); 842 SC2IFP(sc)->if_oerrors++; 843 return (ENOMEM); 844 } 845 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 846 m_tag_prepend(m, mtag); 847 848 return (0); 849 } 850 851 static void 852 carp_send_ad_all(void) 853 { 854 struct carp_softc *sc; 855 856 mtx_lock(&carp_mtx); 857 LIST_FOREACH(sc, &carpif_list, sc_next) { 858 if (sc->sc_carpdev == NULL) 859 continue; 860 CARP_SCLOCK(sc); 861 if ((SC2IFP(sc)->if_flags & IFF_UP) && 862 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && 863 sc->sc_state == MASTER) 864 carp_send_ad_locked(sc); 865 CARP_SCUNLOCK(sc); 866 } 867 mtx_unlock(&carp_mtx); 868 } 869 870 static void 871 carp_send_ad(void *v) 872 { 873 struct carp_softc *sc = v; 874 875 CARP_SCLOCK(sc); 876 carp_send_ad_locked(sc); 877 CARP_SCUNLOCK(sc); 878 } 879 880 static void 881 carp_send_ad_locked(struct carp_softc *sc) 882 { 883 struct carp_header ch; 884 struct timeval tv; 885 struct carp_header *ch_ptr; 886 struct mbuf *m; 887 int len, advbase, advskew; 888 889 CARP_SCLOCK_ASSERT(sc); 890 891 /* bow out if we've lost our UPness or RUNNINGuiness */ 892 if (!((SC2IFP(sc)->if_flags & IFF_UP) && 893 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 894 advbase = 255; 895 advskew = 255; 896 } else { 897 advbase = sc->sc_advbase; 898 if (!carp_suppress_preempt || sc->sc_advskew > 240) 899 advskew = sc->sc_advskew; 900 else 901 advskew = 240; 902 tv.tv_sec = advbase; 903 tv.tv_usec = advskew * 1000000 / 256; 904 } 905 906 ch.carp_version = CARP_VERSION; 907 ch.carp_type = CARP_ADVERTISEMENT; 908 ch.carp_vhid = sc->sc_vhid; 909 ch.carp_advbase = advbase; 910 ch.carp_advskew = advskew; 911 ch.carp_authlen = 7; /* XXX DEFINE */ 912 ch.carp_pad1 = 0; /* must be zero */ 913 ch.carp_cksum = 0; 914 915 #ifdef INET 916 if (sc->sc_ia) { 917 struct ip *ip; 918 919 MGETHDR(m, M_DONTWAIT, MT_HEADER); 920 if (m == NULL) { 921 SC2IFP(sc)->if_oerrors++; 922 carpstats.carps_onomem++; 923 /* XXX maybe less ? */ 924 if (advbase != 255 || advskew != 255) 925 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 926 carp_send_ad, sc); 927 return; 928 } 929 len = sizeof(*ip) + sizeof(ch); 930 m->m_pkthdr.len = len; 931 m->m_pkthdr.rcvif = NULL; 932 m->m_len = len; 933 MH_ALIGN(m, m->m_len); 934 m->m_flags |= M_MCAST; 935 ip = mtod(m, struct ip *); 936 ip->ip_v = IPVERSION; 937 ip->ip_hl = sizeof(*ip) >> 2; 938 ip->ip_tos = IPTOS_LOWDELAY; 939 ip->ip_len = len; 940 ip->ip_id = ip_newid(); 941 ip->ip_off = IP_DF; 942 ip->ip_ttl = CARP_DFLTTL; 943 ip->ip_p = IPPROTO_CARP; 944 ip->ip_sum = 0; 945 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 946 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 947 948 ch_ptr = (struct carp_header *)(&ip[1]); 949 bcopy(&ch, ch_ptr, sizeof(ch)); 950 if (carp_prepare_ad(m, sc, ch_ptr)) 951 return; 952 953 m->m_data += sizeof(*ip); 954 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 955 m->m_data -= sizeof(*ip); 956 957 getmicrotime(&SC2IFP(sc)->if_lastchange); 958 SC2IFP(sc)->if_opackets++; 959 SC2IFP(sc)->if_obytes += len; 960 carpstats.carps_opackets++; 961 962 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 963 SC2IFP(sc)->if_oerrors++; 964 if (sc->sc_sendad_errors < INT_MAX) 965 sc->sc_sendad_errors++; 966 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 967 carp_suppress_preempt++; 968 if (carp_suppress_preempt == 1) { 969 CARP_SCUNLOCK(sc); 970 carp_send_ad_all(); 971 CARP_SCLOCK(sc); 972 } 973 } 974 sc->sc_sendad_success = 0; 975 } else { 976 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 977 if (++sc->sc_sendad_success >= 978 CARP_SENDAD_MIN_SUCCESS) { 979 carp_suppress_preempt--; 980 sc->sc_sendad_errors = 0; 981 } 982 } else 983 sc->sc_sendad_errors = 0; 984 } 985 } 986 #endif /* INET */ 987 #ifdef INET6 988 if (sc->sc_ia6) { 989 struct ip6_hdr *ip6; 990 991 MGETHDR(m, M_DONTWAIT, MT_HEADER); 992 if (m == NULL) { 993 SC2IFP(sc)->if_oerrors++; 994 carpstats.carps_onomem++; 995 /* XXX maybe less ? */ 996 if (advbase != 255 || advskew != 255) 997 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 998 carp_send_ad, sc); 999 return; 1000 } 1001 len = sizeof(*ip6) + sizeof(ch); 1002 m->m_pkthdr.len = len; 1003 m->m_pkthdr.rcvif = NULL; 1004 m->m_len = len; 1005 MH_ALIGN(m, m->m_len); 1006 m->m_flags |= M_MCAST; 1007 ip6 = mtod(m, struct ip6_hdr *); 1008 bzero(ip6, sizeof(*ip6)); 1009 ip6->ip6_vfc |= IPV6_VERSION; 1010 ip6->ip6_hlim = CARP_DFLTTL; 1011 ip6->ip6_nxt = IPPROTO_CARP; 1012 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1013 sizeof(struct in6_addr)); 1014 /* set the multicast destination */ 1015 1016 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1017 ip6->ip6_dst.s6_addr8[15] = 0x12; 1018 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1019 SC2IFP(sc)->if_oerrors++; 1020 m_freem(m); 1021 CARP_LOG("%s: in6_setscope failed\n", __func__); 1022 return; 1023 } 1024 1025 ch_ptr = (struct carp_header *)(&ip6[1]); 1026 bcopy(&ch, ch_ptr, sizeof(ch)); 1027 if (carp_prepare_ad(m, sc, ch_ptr)) 1028 return; 1029 1030 m->m_data += sizeof(*ip6); 1031 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1032 m->m_data -= sizeof(*ip6); 1033 1034 getmicrotime(&SC2IFP(sc)->if_lastchange); 1035 SC2IFP(sc)->if_opackets++; 1036 SC2IFP(sc)->if_obytes += len; 1037 carpstats.carps_opackets6++; 1038 1039 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1040 SC2IFP(sc)->if_oerrors++; 1041 if (sc->sc_sendad_errors < INT_MAX) 1042 sc->sc_sendad_errors++; 1043 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1044 carp_suppress_preempt++; 1045 if (carp_suppress_preempt == 1) { 1046 CARP_SCUNLOCK(sc); 1047 carp_send_ad_all(); 1048 CARP_SCLOCK(sc); 1049 } 1050 } 1051 sc->sc_sendad_success = 0; 1052 } else { 1053 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1054 if (++sc->sc_sendad_success >= 1055 CARP_SENDAD_MIN_SUCCESS) { 1056 carp_suppress_preempt--; 1057 sc->sc_sendad_errors = 0; 1058 } 1059 } else 1060 sc->sc_sendad_errors = 0; 1061 } 1062 } 1063 #endif /* INET6 */ 1064 1065 if (advbase != 255 || advskew != 255) 1066 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1067 carp_send_ad, sc); 1068 1069 } 1070 1071 /* 1072 * Broadcast a gratuitous ARP request containing 1073 * the virtual router MAC address for each IP address 1074 * associated with the virtual router. 1075 */ 1076 static void 1077 carp_send_arp(struct carp_softc *sc) 1078 { 1079 struct ifaddr *ifa; 1080 1081 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1082 1083 if (ifa->ifa_addr->sa_family != AF_INET) 1084 continue; 1085 1086 /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ 1087 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); 1088 1089 DELAY(1000); /* XXX */ 1090 } 1091 } 1092 1093 #ifdef INET6 1094 static void 1095 carp_send_na(struct carp_softc *sc) 1096 { 1097 struct ifaddr *ifa; 1098 struct in6_addr *in6; 1099 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1100 1101 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1102 1103 if (ifa->ifa_addr->sa_family != AF_INET6) 1104 continue; 1105 1106 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1107 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1108 ND_NA_FLAG_OVERRIDE, 1, NULL); 1109 DELAY(1000); /* XXX */ 1110 } 1111 } 1112 #endif /* INET6 */ 1113 1114 static int 1115 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1116 { 1117 struct carp_softc *vh; 1118 struct ifaddr *ifa; 1119 int count = 0; 1120 1121 CARP_LOCK_ASSERT(cif); 1122 1123 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1124 if ((type == CARP_COUNT_RUNNING && 1125 (SC2IFP(vh)->if_flags & IFF_UP) && 1126 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || 1127 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1128 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1129 ifa_list) { 1130 if (ifa->ifa_addr->sa_family == AF_INET && 1131 ia->ia_addr.sin_addr.s_addr == 1132 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1133 count++; 1134 } 1135 } 1136 } 1137 return (count); 1138 } 1139 1140 int 1141 carp_iamatch(void *v, struct in_ifaddr *ia, 1142 struct in_addr *isaddr, u_int8_t **enaddr) 1143 { 1144 struct carp_if *cif = v; 1145 struct carp_softc *vh; 1146 int index, count = 0; 1147 struct ifaddr *ifa; 1148 1149 CARP_LOCK(cif); 1150 1151 if (carp_opts[CARPCTL_ARPBALANCE]) { 1152 /* 1153 * XXX proof of concept implementation. 1154 * We use the source ip to decide which virtual host should 1155 * handle the request. If we're master of that virtual host, 1156 * then we respond, otherwise, just drop the arp packet on 1157 * the floor. 1158 */ 1159 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1160 if (count == 0) { 1161 /* should never reach this */ 1162 CARP_UNLOCK(cif); 1163 return (0); 1164 } 1165 1166 /* this should be a hash, like pf_hash() */ 1167 index = ntohl(isaddr->s_addr) % count; 1168 count = 0; 1169 1170 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1171 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1172 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { 1173 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1174 ifa_list) { 1175 if (ifa->ifa_addr->sa_family == 1176 AF_INET && 1177 ia->ia_addr.sin_addr.s_addr == 1178 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1179 if (count == index) { 1180 if (vh->sc_state == 1181 MASTER) { 1182 *enaddr = IF_LLADDR(vh->sc_ifp); 1183 CARP_UNLOCK(cif); 1184 return (1); 1185 } else { 1186 CARP_UNLOCK(cif); 1187 return (0); 1188 } 1189 } 1190 count++; 1191 } 1192 } 1193 } 1194 } 1195 } else { 1196 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1197 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1198 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1199 ia->ia_ifp == SC2IFP(vh) && 1200 vh->sc_state == MASTER) { 1201 *enaddr = IF_LLADDR(vh->sc_ifp); 1202 CARP_UNLOCK(cif); 1203 return (1); 1204 } 1205 } 1206 } 1207 CARP_UNLOCK(cif); 1208 return (0); 1209 } 1210 1211 #ifdef INET6 1212 struct ifaddr * 1213 carp_iamatch6(void *v, struct in6_addr *taddr) 1214 { 1215 struct carp_if *cif = v; 1216 struct carp_softc *vh; 1217 struct ifaddr *ifa; 1218 1219 CARP_LOCK(cif); 1220 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1221 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { 1222 if (IN6_ARE_ADDR_EQUAL(taddr, 1223 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1224 (SC2IFP(vh)->if_flags & IFF_UP) && 1225 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1226 vh->sc_state == MASTER) { 1227 CARP_UNLOCK(cif); 1228 return (ifa); 1229 } 1230 } 1231 } 1232 CARP_UNLOCK(cif); 1233 1234 return (NULL); 1235 } 1236 1237 void * 1238 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1239 { 1240 struct m_tag *mtag; 1241 struct carp_if *cif = v; 1242 struct carp_softc *sc; 1243 struct ifaddr *ifa; 1244 1245 CARP_LOCK(cif); 1246 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1247 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1248 if (IN6_ARE_ADDR_EQUAL(taddr, 1249 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1250 (SC2IFP(sc)->if_flags & IFF_UP) && 1251 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { 1252 struct ifnet *ifp = SC2IFP(sc); 1253 mtag = m_tag_get(PACKET_TAG_CARP, 1254 sizeof(struct ifnet *), M_NOWAIT); 1255 if (mtag == NULL) { 1256 /* better a bit than nothing */ 1257 CARP_UNLOCK(cif); 1258 return (IF_LLADDR(sc->sc_ifp)); 1259 } 1260 bcopy(&ifp, (caddr_t)(mtag + 1), 1261 sizeof(struct ifnet *)); 1262 m_tag_prepend(m, mtag); 1263 1264 CARP_UNLOCK(cif); 1265 return (IF_LLADDR(sc->sc_ifp)); 1266 } 1267 } 1268 } 1269 CARP_UNLOCK(cif); 1270 1271 return (NULL); 1272 } 1273 #endif 1274 1275 struct ifnet * 1276 carp_forus(void *v, void *dhost) 1277 { 1278 struct carp_if *cif = v; 1279 struct carp_softc *vh; 1280 u_int8_t *ena = dhost; 1281 1282 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1283 return (NULL); 1284 1285 CARP_LOCK(cif); 1286 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1287 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1288 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1289 vh->sc_state == MASTER && 1290 !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { 1291 CARP_UNLOCK(cif); 1292 return (SC2IFP(vh)); 1293 } 1294 1295 CARP_UNLOCK(cif); 1296 return (NULL); 1297 } 1298 1299 static void 1300 carp_master_down(void *v) 1301 { 1302 struct carp_softc *sc = v; 1303 1304 CARP_SCLOCK(sc); 1305 carp_master_down_locked(sc); 1306 CARP_SCUNLOCK(sc); 1307 } 1308 1309 static void 1310 carp_master_down_locked(struct carp_softc *sc) 1311 { 1312 if (sc->sc_carpdev) 1313 CARP_SCLOCK_ASSERT(sc); 1314 1315 switch (sc->sc_state) { 1316 case INIT: 1317 printf("%s: master_down event in INIT state\n", 1318 SC2IFP(sc)->if_xname); 1319 break; 1320 case MASTER: 1321 break; 1322 case BACKUP: 1323 carp_set_state(sc, MASTER); 1324 carp_send_ad_locked(sc); 1325 carp_send_arp(sc); 1326 #ifdef INET6 1327 carp_send_na(sc); 1328 #endif /* INET6 */ 1329 carp_setrun(sc, 0); 1330 carp_setroute(sc, RTM_ADD); 1331 break; 1332 } 1333 } 1334 1335 /* 1336 * When in backup state, af indicates whether to reset the master down timer 1337 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1338 */ 1339 static void 1340 carp_setrun(struct carp_softc *sc, sa_family_t af) 1341 { 1342 struct timeval tv; 1343 1344 if (sc->sc_carpdev == NULL) { 1345 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1346 carp_set_state(sc, INIT); 1347 return; 1348 } else 1349 CARP_SCLOCK_ASSERT(sc); 1350 1351 if (SC2IFP(sc)->if_flags & IFF_UP && 1352 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) 1353 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 1354 else { 1355 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1356 carp_setroute(sc, RTM_DELETE); 1357 return; 1358 } 1359 1360 switch (sc->sc_state) { 1361 case INIT: 1362 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1363 carp_send_ad_locked(sc); 1364 carp_send_arp(sc); 1365 #ifdef INET6 1366 carp_send_na(sc); 1367 #endif /* INET6 */ 1368 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1369 SC2IFP(sc)->if_xname); 1370 carp_set_state(sc, MASTER); 1371 carp_setroute(sc, RTM_ADD); 1372 } else { 1373 CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); 1374 carp_set_state(sc, BACKUP); 1375 carp_setroute(sc, RTM_DELETE); 1376 carp_setrun(sc, 0); 1377 } 1378 break; 1379 case BACKUP: 1380 callout_stop(&sc->sc_ad_tmo); 1381 tv.tv_sec = 3 * sc->sc_advbase; 1382 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1383 switch (af) { 1384 #ifdef INET 1385 case AF_INET: 1386 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1387 carp_master_down, sc); 1388 break; 1389 #endif /* INET */ 1390 #ifdef INET6 1391 case AF_INET6: 1392 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1393 carp_master_down, sc); 1394 break; 1395 #endif /* INET6 */ 1396 default: 1397 if (sc->sc_naddrs) 1398 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1399 carp_master_down, sc); 1400 if (sc->sc_naddrs6) 1401 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1402 carp_master_down, sc); 1403 break; 1404 } 1405 break; 1406 case MASTER: 1407 tv.tv_sec = sc->sc_advbase; 1408 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1409 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1410 carp_send_ad, sc); 1411 break; 1412 } 1413 } 1414 1415 static void 1416 carp_multicast_cleanup(struct carp_softc *sc) 1417 { 1418 struct ip_moptions *imo = &sc->sc_imo; 1419 u_int16_t n = imo->imo_num_memberships; 1420 1421 /* Clean up our own multicast memberships */ 1422 while (n-- > 0) { 1423 if (imo->imo_membership[n] != NULL) { 1424 in_delmulti(imo->imo_membership[n]); 1425 imo->imo_membership[n] = NULL; 1426 } 1427 } 1428 KASSERT(imo->imo_mfilters == NULL, 1429 ("%s: imo_mfilters != NULL", __func__)); 1430 imo->imo_num_memberships = 0; 1431 imo->imo_multicast_ifp = NULL; 1432 } 1433 1434 #ifdef INET6 1435 static void 1436 carp_multicast6_cleanup(struct carp_softc *sc) 1437 { 1438 struct ip6_moptions *im6o = &sc->sc_im6o; 1439 1440 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1441 struct in6_multi_mship *imm = 1442 LIST_FIRST(&im6o->im6o_memberships); 1443 1444 LIST_REMOVE(imm, i6mm_chain); 1445 in6_leavegroup(imm); 1446 } 1447 im6o->im6o_multicast_ifp = NULL; 1448 } 1449 #endif 1450 1451 static int 1452 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1453 { 1454 struct ifnet *ifp; 1455 struct carp_if *cif; 1456 struct in_ifaddr *ia, *ia_if; 1457 struct ip_moptions *imo = &sc->sc_imo; 1458 struct in_addr addr; 1459 u_long iaddr = htonl(sin->sin_addr.s_addr); 1460 int own, error; 1461 1462 if (sin->sin_addr.s_addr == 0) { 1463 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1464 carp_set_state(sc, INIT); 1465 if (sc->sc_naddrs) 1466 SC2IFP(sc)->if_flags |= IFF_UP; 1467 if (sc->sc_carpdev) 1468 CARP_SCLOCK(sc); 1469 carp_setrun(sc, 0); 1470 if (sc->sc_carpdev) 1471 CARP_SCUNLOCK(sc); 1472 return (0); 1473 } 1474 1475 /* we have to do it by hands to check we won't match on us */ 1476 ia_if = NULL; own = 0; 1477 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1478 /* and, yeah, we need a multicast-capable iface too */ 1479 if (ia->ia_ifp != SC2IFP(sc) && 1480 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1481 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1482 if (!ia_if) 1483 ia_if = ia; 1484 if (sin->sin_addr.s_addr == 1485 ia->ia_addr.sin_addr.s_addr) 1486 own++; 1487 } 1488 } 1489 1490 if (!ia_if) 1491 return (EADDRNOTAVAIL); 1492 1493 ia = ia_if; 1494 ifp = ia->ia_ifp; 1495 1496 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1497 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) 1498 return (EADDRNOTAVAIL); 1499 1500 if (imo->imo_num_memberships == 0) { 1501 addr.s_addr = htonl(INADDR_CARP_GROUP); 1502 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 1503 return (ENOBUFS); 1504 imo->imo_num_memberships++; 1505 imo->imo_multicast_ifp = ifp; 1506 imo->imo_multicast_ttl = CARP_DFLTTL; 1507 imo->imo_multicast_loop = 0; 1508 } 1509 1510 if (!ifp->if_carp) { 1511 1512 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1513 M_WAITOK|M_ZERO); 1514 if (!cif) { 1515 error = ENOBUFS; 1516 goto cleanup; 1517 } 1518 if ((error = ifpromisc(ifp, 1))) { 1519 FREE(cif, M_CARP); 1520 goto cleanup; 1521 } 1522 1523 CARP_LOCK_INIT(cif); 1524 CARP_LOCK(cif); 1525 cif->vhif_ifp = ifp; 1526 TAILQ_INIT(&cif->vhif_vrs); 1527 ifp->if_carp = cif; 1528 1529 } else { 1530 struct carp_softc *vr; 1531 1532 cif = (struct carp_if *)ifp->if_carp; 1533 CARP_LOCK(cif); 1534 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1535 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1536 CARP_UNLOCK(cif); 1537 error = EEXIST; 1538 goto cleanup; 1539 } 1540 } 1541 sc->sc_ia = ia; 1542 sc->sc_carpdev = ifp; 1543 1544 { /* XXX prevent endless loop if already in queue */ 1545 struct carp_softc *vr, *after = NULL; 1546 int myself = 0; 1547 cif = (struct carp_if *)ifp->if_carp; 1548 1549 /* XXX: cif should not change, right? So we still hold the lock */ 1550 CARP_LOCK_ASSERT(cif); 1551 1552 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1553 if (vr == sc) 1554 myself = 1; 1555 if (vr->sc_vhid < sc->sc_vhid) 1556 after = vr; 1557 } 1558 1559 if (!myself) { 1560 /* We're trying to keep things in order */ 1561 if (after == NULL) { 1562 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1563 } else { 1564 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1565 } 1566 cif->vhif_nvrs++; 1567 } 1568 } 1569 1570 sc->sc_naddrs++; 1571 SC2IFP(sc)->if_flags |= IFF_UP; 1572 if (own) 1573 sc->sc_advskew = 0; 1574 carp_sc_state_locked(sc); 1575 carp_setrun(sc, 0); 1576 1577 CARP_UNLOCK(cif); 1578 1579 return (0); 1580 1581 cleanup: 1582 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1583 return (error); 1584 } 1585 1586 static int 1587 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1588 { 1589 int error = 0; 1590 1591 if (!--sc->sc_naddrs) { 1592 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1593 struct ip_moptions *imo = &sc->sc_imo; 1594 1595 CARP_LOCK(cif); 1596 callout_stop(&sc->sc_ad_tmo); 1597 SC2IFP(sc)->if_flags &= ~IFF_UP; 1598 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1599 sc->sc_vhid = -1; 1600 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1601 imo->imo_multicast_ifp = NULL; 1602 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1603 if (!--cif->vhif_nvrs) { 1604 sc->sc_carpdev->if_carp = NULL; 1605 CARP_LOCK_DESTROY(cif); 1606 FREE(cif, M_IFADDR); 1607 } else { 1608 CARP_UNLOCK(cif); 1609 } 1610 } 1611 1612 return (error); 1613 } 1614 1615 #ifdef INET6 1616 static int 1617 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1618 { 1619 struct ifnet *ifp; 1620 struct carp_if *cif; 1621 struct in6_ifaddr *ia, *ia_if; 1622 struct ip6_moptions *im6o = &sc->sc_im6o; 1623 struct in6_multi_mship *imm; 1624 struct in6_addr in6; 1625 int own, error; 1626 1627 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1628 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1629 carp_set_state(sc, INIT); 1630 if (sc->sc_naddrs6) 1631 SC2IFP(sc)->if_flags |= IFF_UP; 1632 if (sc->sc_carpdev) 1633 CARP_SCLOCK(sc); 1634 carp_setrun(sc, 0); 1635 if (sc->sc_carpdev) 1636 CARP_SCUNLOCK(sc); 1637 return (0); 1638 } 1639 1640 /* we have to do it by hands to check we won't match on us */ 1641 ia_if = NULL; own = 0; 1642 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1643 int i; 1644 1645 for (i = 0; i < 4; i++) { 1646 if ((sin6->sin6_addr.s6_addr32[i] & 1647 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1648 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1649 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1650 break; 1651 } 1652 /* and, yeah, we need a multicast-capable iface too */ 1653 if (ia->ia_ifp != SC2IFP(sc) && 1654 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1655 (i == 4)) { 1656 if (!ia_if) 1657 ia_if = ia; 1658 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1659 &ia->ia_addr.sin6_addr)) 1660 own++; 1661 } 1662 } 1663 1664 if (!ia_if) 1665 return (EADDRNOTAVAIL); 1666 ia = ia_if; 1667 ifp = ia->ia_ifp; 1668 1669 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1670 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1671 return (EADDRNOTAVAIL); 1672 1673 if (!sc->sc_naddrs6) { 1674 im6o->im6o_multicast_ifp = ifp; 1675 1676 /* join CARP multicast address */ 1677 bzero(&in6, sizeof(in6)); 1678 in6.s6_addr16[0] = htons(0xff02); 1679 in6.s6_addr8[15] = 0x12; 1680 if (in6_setscope(&in6, ifp, NULL) != 0) 1681 goto cleanup; 1682 if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL) 1683 goto cleanup; 1684 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1685 1686 /* join solicited multicast address */ 1687 bzero(&in6, sizeof(in6)); 1688 in6.s6_addr16[0] = htons(0xff02); 1689 in6.s6_addr32[1] = 0; 1690 in6.s6_addr32[2] = htonl(1); 1691 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1692 in6.s6_addr8[12] = 0xff; 1693 if (in6_setscope(&in6, ifp, NULL) != 0) 1694 goto cleanup; 1695 if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL) 1696 goto cleanup; 1697 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1698 } 1699 1700 if (!ifp->if_carp) { 1701 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1702 M_WAITOK|M_ZERO); 1703 if (!cif) { 1704 error = ENOBUFS; 1705 goto cleanup; 1706 } 1707 if ((error = ifpromisc(ifp, 1))) { 1708 FREE(cif, M_CARP); 1709 goto cleanup; 1710 } 1711 1712 CARP_LOCK_INIT(cif); 1713 CARP_LOCK(cif); 1714 cif->vhif_ifp = ifp; 1715 TAILQ_INIT(&cif->vhif_vrs); 1716 ifp->if_carp = cif; 1717 1718 } else { 1719 struct carp_softc *vr; 1720 1721 cif = (struct carp_if *)ifp->if_carp; 1722 CARP_LOCK(cif); 1723 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1724 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1725 CARP_UNLOCK(cif); 1726 error = EINVAL; 1727 goto cleanup; 1728 } 1729 } 1730 sc->sc_ia6 = ia; 1731 sc->sc_carpdev = ifp; 1732 1733 { /* XXX prevent endless loop if already in queue */ 1734 struct carp_softc *vr, *after = NULL; 1735 int myself = 0; 1736 cif = (struct carp_if *)ifp->if_carp; 1737 CARP_LOCK_ASSERT(cif); 1738 1739 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1740 if (vr == sc) 1741 myself = 1; 1742 if (vr->sc_vhid < sc->sc_vhid) 1743 after = vr; 1744 } 1745 1746 if (!myself) { 1747 /* We're trying to keep things in order */ 1748 if (after == NULL) { 1749 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1750 } else { 1751 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1752 } 1753 cif->vhif_nvrs++; 1754 } 1755 } 1756 1757 sc->sc_naddrs6++; 1758 SC2IFP(sc)->if_flags |= IFF_UP; 1759 if (own) 1760 sc->sc_advskew = 0; 1761 carp_sc_state_locked(sc); 1762 carp_setrun(sc, 0); 1763 1764 CARP_UNLOCK(cif); 1765 1766 return (0); 1767 1768 cleanup: 1769 /* clean up multicast memberships */ 1770 if (!sc->sc_naddrs6) { 1771 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1772 imm = LIST_FIRST(&im6o->im6o_memberships); 1773 LIST_REMOVE(imm, i6mm_chain); 1774 in6_leavegroup(imm); 1775 } 1776 } 1777 return (error); 1778 } 1779 1780 static int 1781 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1782 { 1783 int error = 0; 1784 1785 if (!--sc->sc_naddrs6) { 1786 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1787 struct ip6_moptions *im6o = &sc->sc_im6o; 1788 1789 CARP_LOCK(cif); 1790 callout_stop(&sc->sc_ad_tmo); 1791 SC2IFP(sc)->if_flags &= ~IFF_UP; 1792 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1793 sc->sc_vhid = -1; 1794 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1795 struct in6_multi_mship *imm = 1796 LIST_FIRST(&im6o->im6o_memberships); 1797 1798 LIST_REMOVE(imm, i6mm_chain); 1799 in6_leavegroup(imm); 1800 } 1801 im6o->im6o_multicast_ifp = NULL; 1802 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1803 if (!--cif->vhif_nvrs) { 1804 CARP_LOCK_DESTROY(cif); 1805 sc->sc_carpdev->if_carp = NULL; 1806 FREE(cif, M_IFADDR); 1807 } else 1808 CARP_UNLOCK(cif); 1809 } 1810 1811 return (error); 1812 } 1813 #endif /* INET6 */ 1814 1815 static int 1816 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1817 { 1818 struct carp_softc *sc = ifp->if_softc, *vr; 1819 struct carpreq carpr; 1820 struct ifaddr *ifa; 1821 struct ifreq *ifr; 1822 struct ifaliasreq *ifra; 1823 int locked = 0, error = 0; 1824 1825 ifa = (struct ifaddr *)addr; 1826 ifra = (struct ifaliasreq *)addr; 1827 ifr = (struct ifreq *)addr; 1828 1829 switch (cmd) { 1830 case SIOCSIFADDR: 1831 switch (ifa->ifa_addr->sa_family) { 1832 #ifdef INET 1833 case AF_INET: 1834 SC2IFP(sc)->if_flags |= IFF_UP; 1835 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1836 sizeof(struct sockaddr)); 1837 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1838 break; 1839 #endif /* INET */ 1840 #ifdef INET6 1841 case AF_INET6: 1842 SC2IFP(sc)->if_flags |= IFF_UP; 1843 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1844 break; 1845 #endif /* INET6 */ 1846 default: 1847 error = EAFNOSUPPORT; 1848 break; 1849 } 1850 break; 1851 1852 case SIOCAIFADDR: 1853 switch (ifa->ifa_addr->sa_family) { 1854 #ifdef INET 1855 case AF_INET: 1856 SC2IFP(sc)->if_flags |= IFF_UP; 1857 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1858 sizeof(struct sockaddr)); 1859 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1860 break; 1861 #endif /* INET */ 1862 #ifdef INET6 1863 case AF_INET6: 1864 SC2IFP(sc)->if_flags |= IFF_UP; 1865 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1866 break; 1867 #endif /* INET6 */ 1868 default: 1869 error = EAFNOSUPPORT; 1870 break; 1871 } 1872 break; 1873 1874 case SIOCDIFADDR: 1875 switch (ifa->ifa_addr->sa_family) { 1876 #ifdef INET 1877 case AF_INET: 1878 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1879 break; 1880 #endif /* INET */ 1881 #ifdef INET6 1882 case AF_INET6: 1883 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1884 break; 1885 #endif /* INET6 */ 1886 default: 1887 error = EAFNOSUPPORT; 1888 break; 1889 } 1890 break; 1891 1892 case SIOCSIFFLAGS: 1893 if (sc->sc_carpdev) { 1894 locked = 1; 1895 CARP_SCLOCK(sc); 1896 } 1897 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1898 callout_stop(&sc->sc_ad_tmo); 1899 callout_stop(&sc->sc_md_tmo); 1900 callout_stop(&sc->sc_md6_tmo); 1901 if (sc->sc_state == MASTER) 1902 carp_send_ad_locked(sc); 1903 carp_set_state(sc, INIT); 1904 carp_setrun(sc, 0); 1905 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1906 SC2IFP(sc)->if_flags |= IFF_UP; 1907 carp_setrun(sc, 0); 1908 } 1909 break; 1910 1911 case SIOCSVH: 1912 error = priv_check(curthread, PRIV_NETINET_CARP); 1913 if (error) 1914 break; 1915 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1916 break; 1917 error = 1; 1918 if (sc->sc_carpdev) { 1919 locked = 1; 1920 CARP_SCLOCK(sc); 1921 } 1922 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1923 switch (carpr.carpr_state) { 1924 case BACKUP: 1925 callout_stop(&sc->sc_ad_tmo); 1926 carp_set_state(sc, BACKUP); 1927 carp_setrun(sc, 0); 1928 carp_setroute(sc, RTM_DELETE); 1929 break; 1930 case MASTER: 1931 carp_master_down_locked(sc); 1932 break; 1933 default: 1934 break; 1935 } 1936 } 1937 if (carpr.carpr_vhid > 0) { 1938 if (carpr.carpr_vhid > 255) { 1939 error = EINVAL; 1940 break; 1941 } 1942 if (sc->sc_carpdev) { 1943 struct carp_if *cif; 1944 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1945 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1946 if (vr != sc && 1947 vr->sc_vhid == carpr.carpr_vhid) { 1948 error = EEXIST; 1949 break; 1950 } 1951 if (error == EEXIST) 1952 break; 1953 } 1954 sc->sc_vhid = carpr.carpr_vhid; 1955 IF_LLADDR(sc->sc_ifp)[0] = 0; 1956 IF_LLADDR(sc->sc_ifp)[1] = 0; 1957 IF_LLADDR(sc->sc_ifp)[2] = 0x5e; 1958 IF_LLADDR(sc->sc_ifp)[3] = 0; 1959 IF_LLADDR(sc->sc_ifp)[4] = 1; 1960 IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; 1961 error--; 1962 } 1963 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1964 if (carpr.carpr_advskew >= 255) { 1965 error = EINVAL; 1966 break; 1967 } 1968 if (carpr.carpr_advbase > 255) { 1969 error = EINVAL; 1970 break; 1971 } 1972 sc->sc_advbase = carpr.carpr_advbase; 1973 sc->sc_advskew = carpr.carpr_advskew; 1974 error--; 1975 } 1976 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1977 if (error > 0) 1978 error = EINVAL; 1979 else { 1980 error = 0; 1981 carp_setrun(sc, 0); 1982 } 1983 break; 1984 1985 case SIOCGVH: 1986 /* XXX: lockless read */ 1987 bzero(&carpr, sizeof(carpr)); 1988 carpr.carpr_state = sc->sc_state; 1989 carpr.carpr_vhid = sc->sc_vhid; 1990 carpr.carpr_advbase = sc->sc_advbase; 1991 carpr.carpr_advskew = sc->sc_advskew; 1992 error = priv_check(curthread, PRIV_NETINET_CARP); 1993 if (error == 0) 1994 bcopy(sc->sc_key, carpr.carpr_key, 1995 sizeof(carpr.carpr_key)); 1996 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1997 break; 1998 1999 default: 2000 error = EINVAL; 2001 } 2002 2003 if (locked) 2004 CARP_SCUNLOCK(sc); 2005 2006 carp_hmac_prepare(sc); 2007 2008 return (error); 2009 } 2010 2011 /* 2012 * XXX: this is looutput. We should eventually use it from there. 2013 */ 2014 static int 2015 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2016 struct rtentry *rt) 2017 { 2018 u_int32_t af; 2019 2020 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 2021 2022 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2023 m_freem(m); 2024 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 2025 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 2026 } 2027 2028 ifp->if_opackets++; 2029 ifp->if_obytes += m->m_pkthdr.len; 2030 2031 /* BPF writes need to be handled specially. */ 2032 if (dst->sa_family == AF_UNSPEC) { 2033 bcopy(dst->sa_data, &af, sizeof(af)); 2034 dst->sa_family = af; 2035 } 2036 2037 #if 1 /* XXX */ 2038 switch (dst->sa_family) { 2039 case AF_INET: 2040 case AF_INET6: 2041 case AF_IPX: 2042 case AF_APPLETALK: 2043 break; 2044 default: 2045 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 2046 m_freem(m); 2047 return (EAFNOSUPPORT); 2048 } 2049 #endif 2050 return(if_simloop(ifp, m, dst->sa_family, 0)); 2051 } 2052 2053 /* 2054 * Start output on carp interface. This function should never be called. 2055 */ 2056 static void 2057 carp_start(struct ifnet *ifp) 2058 { 2059 #ifdef DEBUG 2060 printf("%s: start called\n", ifp->if_xname); 2061 #endif 2062 } 2063 2064 int 2065 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2066 struct rtentry *rt) 2067 { 2068 struct m_tag *mtag; 2069 struct carp_softc *sc; 2070 struct ifnet *carp_ifp; 2071 2072 if (!sa) 2073 return (0); 2074 2075 switch (sa->sa_family) { 2076 #ifdef INET 2077 case AF_INET: 2078 break; 2079 #endif /* INET */ 2080 #ifdef INET6 2081 case AF_INET6: 2082 break; 2083 #endif /* INET6 */ 2084 default: 2085 return (0); 2086 } 2087 2088 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2089 if (mtag == NULL) 2090 return (0); 2091 2092 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2093 sc = carp_ifp->if_softc; 2094 2095 /* Set the source MAC address to Virtual Router MAC Address */ 2096 switch (ifp->if_type) { 2097 case IFT_ETHER: 2098 case IFT_L2VLAN: { 2099 struct ether_header *eh; 2100 2101 eh = mtod(m, struct ether_header *); 2102 eh->ether_shost[0] = 0; 2103 eh->ether_shost[1] = 0; 2104 eh->ether_shost[2] = 0x5e; 2105 eh->ether_shost[3] = 0; 2106 eh->ether_shost[4] = 1; 2107 eh->ether_shost[5] = sc->sc_vhid; 2108 } 2109 break; 2110 case IFT_FDDI: { 2111 struct fddi_header *fh; 2112 2113 fh = mtod(m, struct fddi_header *); 2114 fh->fddi_shost[0] = 0; 2115 fh->fddi_shost[1] = 0; 2116 fh->fddi_shost[2] = 0x5e; 2117 fh->fddi_shost[3] = 0; 2118 fh->fddi_shost[4] = 1; 2119 fh->fddi_shost[5] = sc->sc_vhid; 2120 } 2121 break; 2122 case IFT_ISO88025: { 2123 struct iso88025_header *th; 2124 th = mtod(m, struct iso88025_header *); 2125 th->iso88025_shost[0] = 3; 2126 th->iso88025_shost[1] = 0; 2127 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2128 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2129 th->iso88025_shost[4] = 0; 2130 th->iso88025_shost[5] = 0; 2131 } 2132 break; 2133 default: 2134 printf("%s: carp is not supported for this interface type\n", 2135 ifp->if_xname); 2136 return (EOPNOTSUPP); 2137 } 2138 2139 return (0); 2140 } 2141 2142 static void 2143 carp_set_state(struct carp_softc *sc, int state) 2144 { 2145 2146 if (sc->sc_carpdev) 2147 CARP_SCLOCK_ASSERT(sc); 2148 2149 if (sc->sc_state == state) 2150 return; 2151 2152 sc->sc_state = state; 2153 switch (state) { 2154 case BACKUP: 2155 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN; 2156 break; 2157 case MASTER: 2158 SC2IFP(sc)->if_link_state = LINK_STATE_UP; 2159 break; 2160 default: 2161 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN; 2162 break; 2163 } 2164 rt_ifmsg(SC2IFP(sc)); 2165 } 2166 2167 void 2168 carp_carpdev_state(void *v) 2169 { 2170 struct carp_if *cif = v; 2171 2172 CARP_LOCK(cif); 2173 carp_carpdev_state_locked(cif); 2174 CARP_UNLOCK(cif); 2175 } 2176 2177 static void 2178 carp_carpdev_state_locked(struct carp_if *cif) 2179 { 2180 struct carp_softc *sc; 2181 2182 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2183 carp_sc_state_locked(sc); 2184 } 2185 2186 static void 2187 carp_sc_state_locked(struct carp_softc *sc) 2188 { 2189 CARP_SCLOCK_ASSERT(sc); 2190 2191 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2192 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2193 sc->sc_flags_backup = SC2IFP(sc)->if_flags; 2194 SC2IFP(sc)->if_flags &= ~IFF_UP; 2195 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 2196 callout_stop(&sc->sc_ad_tmo); 2197 callout_stop(&sc->sc_md_tmo); 2198 callout_stop(&sc->sc_md6_tmo); 2199 carp_set_state(sc, INIT); 2200 carp_setrun(sc, 0); 2201 if (!sc->sc_suppress) { 2202 carp_suppress_preempt++; 2203 if (carp_suppress_preempt == 1) { 2204 CARP_SCUNLOCK(sc); 2205 carp_send_ad_all(); 2206 CARP_SCLOCK(sc); 2207 } 2208 } 2209 sc->sc_suppress = 1; 2210 } else { 2211 SC2IFP(sc)->if_flags |= sc->sc_flags_backup; 2212 carp_set_state(sc, INIT); 2213 carp_setrun(sc, 0); 2214 if (sc->sc_suppress) 2215 carp_suppress_preempt--; 2216 sc->sc_suppress = 0; 2217 } 2218 2219 return; 2220 } 2221 2222 static int 2223 carp_modevent(module_t mod, int type, void *data) 2224 { 2225 switch (type) { 2226 case MOD_LOAD: 2227 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2228 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 2229 if (if_detach_event_tag == NULL) 2230 return (ENOMEM); 2231 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2232 LIST_INIT(&carpif_list); 2233 if_clone_attach(&carp_cloner); 2234 break; 2235 2236 case MOD_UNLOAD: 2237 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2238 if_clone_detach(&carp_cloner); 2239 mtx_destroy(&carp_mtx); 2240 break; 2241 2242 default: 2243 return (EINVAL); 2244 } 2245 2246 return (0); 2247 } 2248 2249 static moduledata_t carp_mod = { 2250 "carp", 2251 carp_modevent, 2252 0 2253 }; 2254 2255 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2256