1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_carp.h" 31 #include "opt_bpf.h" 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/conf.h> 39 #include <sys/kernel.h> 40 #include <sys/limits.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/module.h> 44 #include <sys/time.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/signalvar.h> 50 #include <sys/filio.h> 51 #include <sys/sockio.h> 52 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 56 #include <machine/stdarg.h> 57 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 #include <net/fddi.h> 61 #include <net/iso88025.h> 62 #include <net/if.h> 63 #include <net/if_clone.h> 64 #include <net/if_dl.h> 65 #include <net/if_types.h> 66 #include <net/route.h> 67 #include <net/vnet.h> 68 69 #ifdef INET 70 #include <netinet/in.h> 71 #include <netinet/in_var.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/ip.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #include <machine/in_cksum.h> 77 #endif 78 79 #ifdef INET6 80 #include <netinet/icmp6.h> 81 #include <netinet/ip6.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 #include <netinet/ip_carp.h> 89 90 #define CARP_IFNAME "carp" 91 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 92 SYSCTL_DECL(_net_inet_carp); 93 94 struct carp_softc { 95 struct ifnet *sc_ifp; /* Interface clue */ 96 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 97 struct in_ifaddr *sc_ia; /* primary iface address */ 98 struct ip_moptions sc_imo; 99 #ifdef INET6 100 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 101 struct ip6_moptions sc_im6o; 102 #endif /* INET6 */ 103 TAILQ_ENTRY(carp_softc) sc_list; 104 105 enum { INIT = 0, BACKUP, MASTER } sc_state; 106 107 int sc_flags_backup; 108 int sc_suppress; 109 110 int sc_sendad_errors; 111 #define CARP_SENDAD_MAX_ERRORS 3 112 int sc_sendad_success; 113 #define CARP_SENDAD_MIN_SUCCESS 3 114 115 int sc_vhid; 116 int sc_advskew; 117 int sc_naddrs; 118 int sc_naddrs6; 119 int sc_advbase; /* seconds */ 120 int sc_init_counter; 121 u_int64_t sc_counter; 122 123 /* authentication */ 124 #define CARP_HMAC_PAD 64 125 unsigned char sc_key[CARP_KEY_LEN]; 126 unsigned char sc_pad[CARP_HMAC_PAD]; 127 SHA1_CTX sc_sha1; 128 129 struct callout sc_ad_tmo; /* advertisement timeout */ 130 struct callout sc_md_tmo; /* master down timeout */ 131 struct callout sc_md6_tmo; /* master down timeout */ 132 133 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 134 }; 135 #define SC2IFP(sc) ((sc)->sc_ifp) 136 137 int carp_suppress_preempt = 0; 138 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 139 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 140 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 141 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 142 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 143 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 144 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 145 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 146 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 147 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 148 &carp_suppress_preempt, 0, "Preemption is suppressed"); 149 150 struct carpstats carpstats; 151 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 152 &carpstats, carpstats, 153 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 154 155 struct carp_if { 156 TAILQ_HEAD(, carp_softc) vhif_vrs; 157 int vhif_nvrs; 158 159 struct ifnet *vhif_ifp; 160 struct mtx vhif_mtx; 161 }; 162 163 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 164 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 165 166 /* lock per carp_if queue */ 167 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 168 NULL, MTX_DEF) 169 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 170 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 171 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 172 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 173 174 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 175 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 176 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 177 178 #define CARP_LOG(...) do { \ 179 if (carp_opts[CARPCTL_LOG] > 0) \ 180 log(LOG_INFO, __VA_ARGS__); \ 181 } while (0) 182 183 #define CARP_DEBUG(...) do { \ 184 if (carp_opts[CARPCTL_LOG] > 1) \ 185 log(LOG_DEBUG, __VA_ARGS__); \ 186 } while (0) 187 188 static void carp_hmac_prepare(struct carp_softc *); 189 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 190 unsigned char *); 191 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 192 unsigned char *); 193 static void carp_setroute(struct carp_softc *, int); 194 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 195 static int carp_clone_create(struct if_clone *, int, caddr_t); 196 static void carp_clone_destroy(struct ifnet *); 197 static void carpdetach(struct carp_softc *, int); 198 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 199 struct carp_header *); 200 static void carp_send_ad_all(void); 201 static void carp_send_ad(void *); 202 static void carp_send_ad_locked(struct carp_softc *); 203 static void carp_send_arp(struct carp_softc *); 204 static void carp_master_down(void *); 205 static void carp_master_down_locked(struct carp_softc *); 206 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 207 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 208 struct route *); 209 static void carp_start(struct ifnet *); 210 static void carp_setrun(struct carp_softc *, sa_family_t); 211 static void carp_set_state(struct carp_softc *, int); 212 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 213 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 214 215 static void carp_multicast_cleanup(struct carp_softc *); 216 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 217 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 218 static void carp_carpdev_state_locked(struct carp_if *); 219 static void carp_sc_state_locked(struct carp_softc *); 220 #ifdef INET6 221 static void carp_send_na(struct carp_softc *); 222 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 223 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 224 static void carp_multicast6_cleanup(struct carp_softc *); 225 #endif 226 227 static LIST_HEAD(, carp_softc) carpif_list; 228 static struct mtx carp_mtx; 229 IFC_SIMPLE_DECLARE(carp, 0); 230 231 static eventhandler_tag if_detach_event_tag; 232 233 static __inline u_int16_t 234 carp_cksum(struct mbuf *m, int len) 235 { 236 return (in_cksum(m, len)); 237 } 238 239 static void 240 carp_hmac_prepare(struct carp_softc *sc) 241 { 242 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 243 u_int8_t vhid = sc->sc_vhid & 0xff; 244 struct ifaddr *ifa; 245 int i, found; 246 #ifdef INET 247 struct in_addr last, cur, in; 248 #endif 249 #ifdef INET6 250 struct in6_addr last6, cur6, in6; 251 #endif 252 253 if (sc->sc_carpdev) 254 CARP_SCLOCK(sc); 255 256 /* XXX: possible race here */ 257 258 /* compute ipad from key */ 259 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 260 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 261 for (i = 0; i < sizeof(sc->sc_pad); i++) 262 sc->sc_pad[i] ^= 0x36; 263 264 /* precompute first part of inner hash */ 265 SHA1Init(&sc->sc_sha1); 266 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 267 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 268 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 269 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 270 #ifdef INET 271 cur.s_addr = 0; 272 do { 273 found = 0; 274 last = cur; 275 cur.s_addr = 0xffffffff; 276 IF_ADDR_LOCK(SC2IFP(sc)); 277 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 278 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 279 if (ifa->ifa_addr->sa_family == AF_INET && 280 ntohl(in.s_addr) > ntohl(last.s_addr) && 281 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 282 cur.s_addr = in.s_addr; 283 found++; 284 } 285 } 286 IF_ADDR_UNLOCK(SC2IFP(sc)); 287 if (found) 288 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 289 } while (found); 290 #endif /* INET */ 291 #ifdef INET6 292 memset(&cur6, 0, sizeof(cur6)); 293 do { 294 found = 0; 295 last6 = cur6; 296 memset(&cur6, 0xff, sizeof(cur6)); 297 IF_ADDR_LOCK(SC2IFP(sc)); 298 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 299 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 300 if (IN6_IS_SCOPE_EMBED(&in6)) 301 in6.s6_addr16[1] = 0; 302 if (ifa->ifa_addr->sa_family == AF_INET6 && 303 memcmp(&in6, &last6, sizeof(in6)) > 0 && 304 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 305 cur6 = in6; 306 found++; 307 } 308 } 309 IF_ADDR_UNLOCK(SC2IFP(sc)); 310 if (found) 311 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 312 } while (found); 313 #endif /* INET6 */ 314 315 /* convert ipad to opad */ 316 for (i = 0; i < sizeof(sc->sc_pad); i++) 317 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 318 319 if (sc->sc_carpdev) 320 CARP_SCUNLOCK(sc); 321 } 322 323 static void 324 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 325 unsigned char md[20]) 326 { 327 SHA1_CTX sha1ctx; 328 329 /* fetch first half of inner hash */ 330 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 331 332 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 333 SHA1Final(md, &sha1ctx); 334 335 /* outer hash */ 336 SHA1Init(&sha1ctx); 337 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 338 SHA1Update(&sha1ctx, md, 20); 339 SHA1Final(md, &sha1ctx); 340 } 341 342 static int 343 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 344 unsigned char md[20]) 345 { 346 unsigned char md2[20]; 347 348 CARP_SCLOCK_ASSERT(sc); 349 350 carp_hmac_generate(sc, counter, md2); 351 352 return (bcmp(md, md2, sizeof(md2))); 353 } 354 355 static void 356 carp_setroute(struct carp_softc *sc, int cmd) 357 { 358 struct ifaddr *ifa; 359 int s; 360 361 if (sc->sc_carpdev) 362 CARP_SCLOCK_ASSERT(sc); 363 364 s = splnet(); 365 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 366 if (ifa->ifa_addr->sa_family == AF_INET && 367 sc->sc_carpdev != NULL) { 368 int count = carp_addrcount( 369 (struct carp_if *)sc->sc_carpdev->if_carp, 370 ifatoia(ifa), CARP_COUNT_MASTER); 371 372 if ((cmd == RTM_ADD && count == 1) || 373 (cmd == RTM_DELETE && count == 0)) 374 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 375 } 376 } 377 splx(s); 378 } 379 380 static int 381 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) 382 { 383 384 struct carp_softc *sc; 385 struct ifnet *ifp; 386 387 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 388 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); 389 if (ifp == NULL) { 390 free(sc, M_CARP); 391 return (ENOSPC); 392 } 393 394 sc->sc_flags_backup = 0; 395 sc->sc_suppress = 0; 396 sc->sc_advbase = CARP_DFLTINTV; 397 sc->sc_vhid = -1; /* required setting */ 398 sc->sc_advskew = 0; 399 sc->sc_init_counter = 1; 400 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 401 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 402 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 403 M_WAITOK); 404 sc->sc_imo.imo_mfilters = NULL; 405 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 406 sc->sc_imo.imo_multicast_vif = -1; 407 #ifdef INET6 408 sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc( 409 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 410 M_WAITOK); 411 sc->sc_im6o.im6o_mfilters = NULL; 412 sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 413 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 414 #endif 415 416 callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE); 417 callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE); 418 callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE); 419 420 ifp->if_softc = sc; 421 if_initname(ifp, CARP_IFNAME, unit); 422 ifp->if_mtu = ETHERMTU; 423 ifp->if_flags = IFF_LOOPBACK; 424 ifp->if_ioctl = carp_ioctl; 425 ifp->if_output = carp_looutput; 426 ifp->if_start = carp_start; 427 ifp->if_type = IFT_CARP; 428 ifp->if_snd.ifq_maxlen = ifqmaxlen; 429 ifp->if_hdrlen = 0; 430 if_attach(ifp); 431 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 432 mtx_lock(&carp_mtx); 433 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 434 mtx_unlock(&carp_mtx); 435 return (0); 436 } 437 438 static void 439 carp_clone_destroy(struct ifnet *ifp) 440 { 441 struct carp_softc *sc = ifp->if_softc; 442 443 if (sc->sc_carpdev) 444 CARP_SCLOCK(sc); 445 carpdetach(sc, 1); /* Returns unlocked. */ 446 447 mtx_lock(&carp_mtx); 448 LIST_REMOVE(sc, sc_next); 449 mtx_unlock(&carp_mtx); 450 bpfdetach(ifp); 451 if_detach(ifp); 452 if_free_type(ifp, IFT_ETHER); 453 free(sc->sc_imo.imo_membership, M_CARP); 454 #ifdef INET6 455 free(sc->sc_im6o.im6o_membership, M_CARP); 456 #endif 457 free(sc, M_CARP); 458 } 459 460 /* 461 * This function can be called on CARP interface destroy path, 462 * and in case of the removal of the underlying interface as 463 * well. We differentiate these two cases. In the latter case 464 * we do not cleanup our multicast memberships, since they 465 * are already freed. Also, in the latter case we do not 466 * release the lock on return, because the function will be 467 * called once more, for another CARP instance on the same 468 * interface. 469 */ 470 static void 471 carpdetach(struct carp_softc *sc, int unlock) 472 { 473 struct carp_if *cif; 474 475 callout_stop(&sc->sc_ad_tmo); 476 callout_stop(&sc->sc_md_tmo); 477 callout_stop(&sc->sc_md6_tmo); 478 479 if (sc->sc_suppress) 480 carp_suppress_preempt--; 481 sc->sc_suppress = 0; 482 483 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 484 carp_suppress_preempt--; 485 sc->sc_sendad_errors = 0; 486 487 carp_set_state(sc, INIT); 488 SC2IFP(sc)->if_flags &= ~IFF_UP; 489 carp_setrun(sc, 0); 490 if (unlock) 491 carp_multicast_cleanup(sc); 492 #ifdef INET6 493 carp_multicast6_cleanup(sc); 494 #endif 495 496 if (sc->sc_carpdev != NULL) { 497 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 498 CARP_LOCK_ASSERT(cif); 499 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 500 if (!--cif->vhif_nvrs) { 501 ifpromisc(sc->sc_carpdev, 0); 502 sc->sc_carpdev->if_carp = NULL; 503 CARP_LOCK_DESTROY(cif); 504 free(cif, M_CARP); 505 } else if (unlock) 506 CARP_UNLOCK(cif); 507 sc->sc_carpdev = NULL; 508 } 509 } 510 511 /* Detach an interface from the carp. */ 512 static void 513 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 514 { 515 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 516 struct carp_softc *sc, *nextsc; 517 518 if (cif == NULL) 519 return; 520 521 /* 522 * XXX: At the end of for() cycle the lock will be destroyed. 523 */ 524 CARP_LOCK(cif); 525 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 526 nextsc = TAILQ_NEXT(sc, sc_list); 527 carpdetach(sc, 0); 528 } 529 } 530 531 /* 532 * process input packet. 533 * we have rearranged checks order compared to the rfc, 534 * but it seems more efficient this way or not possible otherwise. 535 */ 536 void 537 carp_input(struct mbuf *m, int hlen) 538 { 539 struct ip *ip = mtod(m, struct ip *); 540 struct carp_header *ch; 541 int iplen, len; 542 543 CARPSTATS_INC(carps_ipackets); 544 545 if (!carp_opts[CARPCTL_ALLOW]) { 546 m_freem(m); 547 return; 548 } 549 550 /* check if received on a valid carp interface */ 551 if (m->m_pkthdr.rcvif->if_carp == NULL) { 552 CARPSTATS_INC(carps_badif); 553 CARP_DEBUG("carp_input: packet received on non-carp " 554 "interface: %s\n", 555 m->m_pkthdr.rcvif->if_xname); 556 m_freem(m); 557 return; 558 } 559 560 /* verify that the IP TTL is 255. */ 561 if (ip->ip_ttl != CARP_DFLTTL) { 562 CARPSTATS_INC(carps_badttl); 563 CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n", 564 ip->ip_ttl, 565 m->m_pkthdr.rcvif->if_xname); 566 m_freem(m); 567 return; 568 } 569 570 iplen = ip->ip_hl << 2; 571 572 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 573 CARPSTATS_INC(carps_badlen); 574 CARP_DEBUG("carp_input: received len %zd < " 575 "sizeof(struct carp_header) on %s\n", 576 m->m_len - sizeof(struct ip), 577 m->m_pkthdr.rcvif->if_xname); 578 m_freem(m); 579 return; 580 } 581 582 if (iplen + sizeof(*ch) < m->m_len) { 583 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 584 CARPSTATS_INC(carps_hdrops); 585 CARP_DEBUG("carp_input: pullup failed\n"); 586 return; 587 } 588 ip = mtod(m, struct ip *); 589 } 590 ch = (struct carp_header *)((char *)ip + iplen); 591 592 /* 593 * verify that the received packet length is 594 * equal to the CARP header 595 */ 596 len = iplen + sizeof(*ch); 597 if (len > m->m_pkthdr.len) { 598 CARPSTATS_INC(carps_badlen); 599 CARP_DEBUG("carp_input: packet too short %d on %s\n", 600 m->m_pkthdr.len, 601 m->m_pkthdr.rcvif->if_xname); 602 m_freem(m); 603 return; 604 } 605 606 if ((m = m_pullup(m, len)) == NULL) { 607 CARPSTATS_INC(carps_hdrops); 608 return; 609 } 610 ip = mtod(m, struct ip *); 611 ch = (struct carp_header *)((char *)ip + iplen); 612 613 /* verify the CARP checksum */ 614 m->m_data += iplen; 615 if (carp_cksum(m, len - iplen)) { 616 CARPSTATS_INC(carps_badsum); 617 CARP_DEBUG("carp_input: checksum failed on %s\n", 618 m->m_pkthdr.rcvif->if_xname); 619 m_freem(m); 620 return; 621 } 622 m->m_data -= iplen; 623 624 carp_input_c(m, ch, AF_INET); 625 } 626 627 #ifdef INET6 628 int 629 carp6_input(struct mbuf **mp, int *offp, int proto) 630 { 631 struct mbuf *m = *mp; 632 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 633 struct carp_header *ch; 634 u_int len; 635 636 CARPSTATS_INC(carps_ipackets6); 637 638 if (!carp_opts[CARPCTL_ALLOW]) { 639 m_freem(m); 640 return (IPPROTO_DONE); 641 } 642 643 /* check if received on a valid carp interface */ 644 if (m->m_pkthdr.rcvif->if_carp == NULL) { 645 CARPSTATS_INC(carps_badif); 646 CARP_DEBUG("carp6_input: packet received on non-carp " 647 "interface: %s\n", 648 m->m_pkthdr.rcvif->if_xname); 649 m_freem(m); 650 return (IPPROTO_DONE); 651 } 652 653 /* verify that the IP TTL is 255 */ 654 if (ip6->ip6_hlim != CARP_DFLTTL) { 655 CARPSTATS_INC(carps_badttl); 656 CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n", 657 ip6->ip6_hlim, 658 m->m_pkthdr.rcvif->if_xname); 659 m_freem(m); 660 return (IPPROTO_DONE); 661 } 662 663 /* verify that we have a complete carp packet */ 664 len = m->m_len; 665 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 666 if (ch == NULL) { 667 CARPSTATS_INC(carps_badlen); 668 CARP_DEBUG("carp6_input: packet size %u too small\n", len); 669 return (IPPROTO_DONE); 670 } 671 672 673 /* verify the CARP checksum */ 674 m->m_data += *offp; 675 if (carp_cksum(m, sizeof(*ch))) { 676 CARPSTATS_INC(carps_badsum); 677 CARP_DEBUG("carp6_input: checksum failed, on %s\n", 678 m->m_pkthdr.rcvif->if_xname); 679 m_freem(m); 680 return (IPPROTO_DONE); 681 } 682 m->m_data -= *offp; 683 684 carp_input_c(m, ch, AF_INET6); 685 return (IPPROTO_DONE); 686 } 687 #endif /* INET6 */ 688 689 static void 690 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 691 { 692 struct ifnet *ifp = m->m_pkthdr.rcvif; 693 struct carp_softc *sc; 694 u_int64_t tmp_counter; 695 struct timeval sc_tv, ch_tv; 696 697 /* verify that the VHID is valid on the receiving interface */ 698 CARP_LOCK(ifp->if_carp); 699 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 700 if (sc->sc_vhid == ch->carp_vhid) 701 break; 702 703 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && 704 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 705 CARPSTATS_INC(carps_badvhid); 706 CARP_UNLOCK(ifp->if_carp); 707 m_freem(m); 708 return; 709 } 710 711 getmicrotime(&SC2IFP(sc)->if_lastchange); 712 SC2IFP(sc)->if_ipackets++; 713 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; 714 715 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { 716 struct ip *ip = mtod(m, struct ip *); 717 uint32_t af1 = af; 718 719 /* BPF wants net byte order */ 720 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 721 ip->ip_off = htons(ip->ip_off); 722 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); 723 } 724 725 /* verify the CARP version. */ 726 if (ch->carp_version != CARP_VERSION) { 727 CARPSTATS_INC(carps_badver); 728 SC2IFP(sc)->if_ierrors++; 729 CARP_UNLOCK(ifp->if_carp); 730 CARP_DEBUG("%s; invalid version %d\n", 731 SC2IFP(sc)->if_xname, 732 ch->carp_version); 733 m_freem(m); 734 return; 735 } 736 737 /* verify the hash */ 738 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 739 CARPSTATS_INC(carps_badauth); 740 SC2IFP(sc)->if_ierrors++; 741 CARP_UNLOCK(ifp->if_carp); 742 CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); 743 m_freem(m); 744 return; 745 } 746 747 tmp_counter = ntohl(ch->carp_counter[0]); 748 tmp_counter = tmp_counter<<32; 749 tmp_counter += ntohl(ch->carp_counter[1]); 750 751 /* XXX Replay protection goes here */ 752 753 sc->sc_init_counter = 0; 754 sc->sc_counter = tmp_counter; 755 756 sc_tv.tv_sec = sc->sc_advbase; 757 if (carp_suppress_preempt && sc->sc_advskew < 240) 758 sc_tv.tv_usec = 240 * 1000000 / 256; 759 else 760 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 761 ch_tv.tv_sec = ch->carp_advbase; 762 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 763 764 switch (sc->sc_state) { 765 case INIT: 766 break; 767 case MASTER: 768 /* 769 * If we receive an advertisement from a master who's going to 770 * be more frequent than us, go into BACKUP state. 771 */ 772 if (timevalcmp(&sc_tv, &ch_tv, >) || 773 timevalcmp(&sc_tv, &ch_tv, ==)) { 774 callout_stop(&sc->sc_ad_tmo); 775 CARP_LOG("%s: MASTER -> BACKUP " 776 "(more frequent advertisement received)\n", 777 SC2IFP(sc)->if_xname); 778 carp_set_state(sc, BACKUP); 779 carp_setrun(sc, 0); 780 carp_setroute(sc, RTM_DELETE); 781 } 782 break; 783 case BACKUP: 784 /* 785 * If we're pre-empting masters who advertise slower than us, 786 * and this one claims to be slower, treat him as down. 787 */ 788 if (carp_opts[CARPCTL_PREEMPT] && 789 timevalcmp(&sc_tv, &ch_tv, <)) { 790 CARP_LOG("%s: BACKUP -> MASTER " 791 "(preempting a slower master)\n", 792 SC2IFP(sc)->if_xname); 793 carp_master_down_locked(sc); 794 break; 795 } 796 797 /* 798 * If the master is going to advertise at such a low frequency 799 * that he's guaranteed to time out, we'd might as well just 800 * treat him as timed out now. 801 */ 802 sc_tv.tv_sec = sc->sc_advbase * 3; 803 if (timevalcmp(&sc_tv, &ch_tv, <)) { 804 CARP_LOG("%s: BACKUP -> MASTER " 805 "(master timed out)\n", 806 SC2IFP(sc)->if_xname); 807 carp_master_down_locked(sc); 808 break; 809 } 810 811 /* 812 * Otherwise, we reset the counter and wait for the next 813 * advertisement. 814 */ 815 carp_setrun(sc, af); 816 break; 817 } 818 819 CARP_UNLOCK(ifp->if_carp); 820 821 m_freem(m); 822 return; 823 } 824 825 static int 826 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 827 { 828 struct m_tag *mtag; 829 struct ifnet *ifp = SC2IFP(sc); 830 831 if (sc->sc_init_counter) { 832 /* this could also be seconds since unix epoch */ 833 sc->sc_counter = arc4random(); 834 sc->sc_counter = sc->sc_counter << 32; 835 sc->sc_counter += arc4random(); 836 } else 837 sc->sc_counter++; 838 839 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 840 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 841 842 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 843 844 /* Tag packet for carp_output */ 845 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 846 if (mtag == NULL) { 847 m_freem(m); 848 SC2IFP(sc)->if_oerrors++; 849 return (ENOMEM); 850 } 851 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 852 m_tag_prepend(m, mtag); 853 854 return (0); 855 } 856 857 static void 858 carp_send_ad_all(void) 859 { 860 struct carp_softc *sc; 861 862 mtx_lock(&carp_mtx); 863 LIST_FOREACH(sc, &carpif_list, sc_next) { 864 if (sc->sc_carpdev == NULL) 865 continue; 866 CARP_SCLOCK(sc); 867 if ((SC2IFP(sc)->if_flags & IFF_UP) && 868 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && 869 sc->sc_state == MASTER) 870 carp_send_ad_locked(sc); 871 CARP_SCUNLOCK(sc); 872 } 873 mtx_unlock(&carp_mtx); 874 } 875 876 static void 877 carp_send_ad(void *v) 878 { 879 struct carp_softc *sc = v; 880 881 CARP_SCLOCK(sc); 882 carp_send_ad_locked(sc); 883 CARP_SCUNLOCK(sc); 884 } 885 886 static void 887 carp_send_ad_locked(struct carp_softc *sc) 888 { 889 struct carp_header ch; 890 struct timeval tv; 891 struct carp_header *ch_ptr; 892 struct mbuf *m; 893 int len, advbase, advskew; 894 895 CARP_SCLOCK_ASSERT(sc); 896 897 /* bow out if we've lost our UPness or RUNNINGuiness */ 898 if (!((SC2IFP(sc)->if_flags & IFF_UP) && 899 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 900 advbase = 255; 901 advskew = 255; 902 } else { 903 advbase = sc->sc_advbase; 904 if (!carp_suppress_preempt || sc->sc_advskew > 240) 905 advskew = sc->sc_advskew; 906 else 907 advskew = 240; 908 tv.tv_sec = advbase; 909 tv.tv_usec = advskew * 1000000 / 256; 910 } 911 912 ch.carp_version = CARP_VERSION; 913 ch.carp_type = CARP_ADVERTISEMENT; 914 ch.carp_vhid = sc->sc_vhid; 915 ch.carp_advbase = advbase; 916 ch.carp_advskew = advskew; 917 ch.carp_authlen = 7; /* XXX DEFINE */ 918 ch.carp_pad1 = 0; /* must be zero */ 919 ch.carp_cksum = 0; 920 921 #ifdef INET 922 if (sc->sc_ia) { 923 struct ip *ip; 924 925 MGETHDR(m, M_DONTWAIT, MT_HEADER); 926 if (m == NULL) { 927 SC2IFP(sc)->if_oerrors++; 928 CARPSTATS_INC(carps_onomem); 929 /* XXX maybe less ? */ 930 if (advbase != 255 || advskew != 255) 931 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 932 carp_send_ad, sc); 933 return; 934 } 935 len = sizeof(*ip) + sizeof(ch); 936 m->m_pkthdr.len = len; 937 m->m_pkthdr.rcvif = NULL; 938 m->m_len = len; 939 MH_ALIGN(m, m->m_len); 940 m->m_flags |= M_MCAST; 941 ip = mtod(m, struct ip *); 942 ip->ip_v = IPVERSION; 943 ip->ip_hl = sizeof(*ip) >> 2; 944 ip->ip_tos = IPTOS_LOWDELAY; 945 ip->ip_len = len; 946 ip->ip_id = ip_newid(); 947 ip->ip_off = IP_DF; 948 ip->ip_ttl = CARP_DFLTTL; 949 ip->ip_p = IPPROTO_CARP; 950 ip->ip_sum = 0; 951 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 952 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 953 954 ch_ptr = (struct carp_header *)(&ip[1]); 955 bcopy(&ch, ch_ptr, sizeof(ch)); 956 if (carp_prepare_ad(m, sc, ch_ptr)) 957 return; 958 959 m->m_data += sizeof(*ip); 960 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 961 m->m_data -= sizeof(*ip); 962 963 getmicrotime(&SC2IFP(sc)->if_lastchange); 964 SC2IFP(sc)->if_opackets++; 965 SC2IFP(sc)->if_obytes += len; 966 CARPSTATS_INC(carps_opackets); 967 968 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 969 SC2IFP(sc)->if_oerrors++; 970 if (sc->sc_sendad_errors < INT_MAX) 971 sc->sc_sendad_errors++; 972 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 973 carp_suppress_preempt++; 974 if (carp_suppress_preempt == 1) { 975 CARP_SCUNLOCK(sc); 976 carp_send_ad_all(); 977 CARP_SCLOCK(sc); 978 } 979 } 980 sc->sc_sendad_success = 0; 981 } else { 982 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 983 if (++sc->sc_sendad_success >= 984 CARP_SENDAD_MIN_SUCCESS) { 985 carp_suppress_preempt--; 986 sc->sc_sendad_errors = 0; 987 } 988 } else 989 sc->sc_sendad_errors = 0; 990 } 991 } 992 #endif /* INET */ 993 #ifdef INET6 994 if (sc->sc_ia6) { 995 struct ip6_hdr *ip6; 996 997 MGETHDR(m, M_DONTWAIT, MT_HEADER); 998 if (m == NULL) { 999 SC2IFP(sc)->if_oerrors++; 1000 CARPSTATS_INC(carps_onomem); 1001 /* XXX maybe less ? */ 1002 if (advbase != 255 || advskew != 255) 1003 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1004 carp_send_ad, sc); 1005 return; 1006 } 1007 len = sizeof(*ip6) + sizeof(ch); 1008 m->m_pkthdr.len = len; 1009 m->m_pkthdr.rcvif = NULL; 1010 m->m_len = len; 1011 MH_ALIGN(m, m->m_len); 1012 m->m_flags |= M_MCAST; 1013 ip6 = mtod(m, struct ip6_hdr *); 1014 bzero(ip6, sizeof(*ip6)); 1015 ip6->ip6_vfc |= IPV6_VERSION; 1016 ip6->ip6_hlim = CARP_DFLTTL; 1017 ip6->ip6_nxt = IPPROTO_CARP; 1018 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1019 sizeof(struct in6_addr)); 1020 /* set the multicast destination */ 1021 1022 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1023 ip6->ip6_dst.s6_addr8[15] = 0x12; 1024 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1025 SC2IFP(sc)->if_oerrors++; 1026 m_freem(m); 1027 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1028 return; 1029 } 1030 1031 ch_ptr = (struct carp_header *)(&ip6[1]); 1032 bcopy(&ch, ch_ptr, sizeof(ch)); 1033 if (carp_prepare_ad(m, sc, ch_ptr)) 1034 return; 1035 1036 m->m_data += sizeof(*ip6); 1037 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1038 m->m_data -= sizeof(*ip6); 1039 1040 getmicrotime(&SC2IFP(sc)->if_lastchange); 1041 SC2IFP(sc)->if_opackets++; 1042 SC2IFP(sc)->if_obytes += len; 1043 CARPSTATS_INC(carps_opackets6); 1044 1045 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1046 SC2IFP(sc)->if_oerrors++; 1047 if (sc->sc_sendad_errors < INT_MAX) 1048 sc->sc_sendad_errors++; 1049 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1050 carp_suppress_preempt++; 1051 if (carp_suppress_preempt == 1) { 1052 CARP_SCUNLOCK(sc); 1053 carp_send_ad_all(); 1054 CARP_SCLOCK(sc); 1055 } 1056 } 1057 sc->sc_sendad_success = 0; 1058 } else { 1059 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1060 if (++sc->sc_sendad_success >= 1061 CARP_SENDAD_MIN_SUCCESS) { 1062 carp_suppress_preempt--; 1063 sc->sc_sendad_errors = 0; 1064 } 1065 } else 1066 sc->sc_sendad_errors = 0; 1067 } 1068 } 1069 #endif /* INET6 */ 1070 1071 if (advbase != 255 || advskew != 255) 1072 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1073 carp_send_ad, sc); 1074 1075 } 1076 1077 /* 1078 * Broadcast a gratuitous ARP request containing 1079 * the virtual router MAC address for each IP address 1080 * associated with the virtual router. 1081 */ 1082 static void 1083 carp_send_arp(struct carp_softc *sc) 1084 { 1085 struct ifaddr *ifa; 1086 1087 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1088 1089 if (ifa->ifa_addr->sa_family != AF_INET) 1090 continue; 1091 1092 /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ 1093 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); 1094 1095 DELAY(1000); /* XXX */ 1096 } 1097 } 1098 1099 #ifdef INET6 1100 static void 1101 carp_send_na(struct carp_softc *sc) 1102 { 1103 struct ifaddr *ifa; 1104 struct in6_addr *in6; 1105 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1106 1107 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1108 1109 if (ifa->ifa_addr->sa_family != AF_INET6) 1110 continue; 1111 1112 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1113 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1114 ND_NA_FLAG_OVERRIDE, 1, NULL); 1115 DELAY(1000); /* XXX */ 1116 } 1117 } 1118 #endif /* INET6 */ 1119 1120 static int 1121 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1122 { 1123 struct carp_softc *vh; 1124 struct ifaddr *ifa; 1125 int count = 0; 1126 1127 CARP_LOCK_ASSERT(cif); 1128 1129 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1130 if ((type == CARP_COUNT_RUNNING && 1131 (SC2IFP(vh)->if_flags & IFF_UP) && 1132 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || 1133 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1134 IF_ADDR_LOCK(SC2IFP(vh)); 1135 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1136 ifa_list) { 1137 if (ifa->ifa_addr->sa_family == AF_INET && 1138 ia->ia_addr.sin_addr.s_addr == 1139 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1140 count++; 1141 } 1142 IF_ADDR_UNLOCK(SC2IFP(vh)); 1143 } 1144 } 1145 return (count); 1146 } 1147 1148 int 1149 carp_iamatch(void *v, struct in_ifaddr *ia, 1150 struct in_addr *isaddr, u_int8_t **enaddr) 1151 { 1152 struct carp_if *cif = v; 1153 struct carp_softc *vh; 1154 int index, count = 0; 1155 struct ifaddr *ifa; 1156 1157 CARP_LOCK(cif); 1158 1159 if (carp_opts[CARPCTL_ARPBALANCE]) { 1160 /* 1161 * XXX proof of concept implementation. 1162 * We use the source ip to decide which virtual host should 1163 * handle the request. If we're master of that virtual host, 1164 * then we respond, otherwise, just drop the arp packet on 1165 * the floor. 1166 */ 1167 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1168 if (count == 0) { 1169 /* should never reach this */ 1170 CARP_UNLOCK(cif); 1171 return (0); 1172 } 1173 1174 /* this should be a hash, like pf_hash() */ 1175 index = ntohl(isaddr->s_addr) % count; 1176 count = 0; 1177 1178 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1179 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1180 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { 1181 IF_ADDR_LOCK(SC2IFP(vh)); 1182 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1183 ifa_list) { 1184 if (ifa->ifa_addr->sa_family == 1185 AF_INET && 1186 ia->ia_addr.sin_addr.s_addr == 1187 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1188 if (count == index) { 1189 if (vh->sc_state == 1190 MASTER) { 1191 *enaddr = IF_LLADDR(vh->sc_ifp); 1192 IF_ADDR_UNLOCK(SC2IFP(vh)); 1193 CARP_UNLOCK(cif); 1194 return (1); 1195 } else { 1196 IF_ADDR_UNLOCK(SC2IFP(vh)); 1197 CARP_UNLOCK(cif); 1198 return (0); 1199 } 1200 } 1201 count++; 1202 } 1203 } 1204 IF_ADDR_UNLOCK(SC2IFP(vh)); 1205 } 1206 } 1207 } else { 1208 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1209 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1210 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1211 ia->ia_ifp == SC2IFP(vh) && 1212 vh->sc_state == MASTER) { 1213 *enaddr = IF_LLADDR(vh->sc_ifp); 1214 CARP_UNLOCK(cif); 1215 return (1); 1216 } 1217 } 1218 } 1219 CARP_UNLOCK(cif); 1220 return (0); 1221 } 1222 1223 #ifdef INET6 1224 struct ifaddr * 1225 carp_iamatch6(void *v, struct in6_addr *taddr) 1226 { 1227 struct carp_if *cif = v; 1228 struct carp_softc *vh; 1229 struct ifaddr *ifa; 1230 1231 CARP_LOCK(cif); 1232 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1233 IF_ADDR_LOCK(SC2IFP(vh)); 1234 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { 1235 if (IN6_ARE_ADDR_EQUAL(taddr, 1236 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1237 (SC2IFP(vh)->if_flags & IFF_UP) && 1238 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1239 vh->sc_state == MASTER) { 1240 ifa_ref(ifa); 1241 IF_ADDR_UNLOCK(SC2IFP(vh)); 1242 CARP_UNLOCK(cif); 1243 return (ifa); 1244 } 1245 } 1246 IF_ADDR_UNLOCK(SC2IFP(vh)); 1247 } 1248 CARP_UNLOCK(cif); 1249 1250 return (NULL); 1251 } 1252 1253 void * 1254 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1255 { 1256 struct m_tag *mtag; 1257 struct carp_if *cif = v; 1258 struct carp_softc *sc; 1259 struct ifaddr *ifa; 1260 1261 CARP_LOCK(cif); 1262 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1263 IF_ADDR_LOCK(SC2IFP(sc)); 1264 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1265 if (IN6_ARE_ADDR_EQUAL(taddr, 1266 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1267 (SC2IFP(sc)->if_flags & IFF_UP) && 1268 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { 1269 struct ifnet *ifp = SC2IFP(sc); 1270 mtag = m_tag_get(PACKET_TAG_CARP, 1271 sizeof(struct ifnet *), M_NOWAIT); 1272 if (mtag == NULL) { 1273 /* better a bit than nothing */ 1274 IF_ADDR_UNLOCK(SC2IFP(sc)); 1275 CARP_UNLOCK(cif); 1276 return (IF_LLADDR(sc->sc_ifp)); 1277 } 1278 bcopy(&ifp, (caddr_t)(mtag + 1), 1279 sizeof(struct ifnet *)); 1280 m_tag_prepend(m, mtag); 1281 1282 IF_ADDR_UNLOCK(SC2IFP(sc)); 1283 CARP_UNLOCK(cif); 1284 return (IF_LLADDR(sc->sc_ifp)); 1285 } 1286 } 1287 IF_ADDR_UNLOCK(SC2IFP(sc)); 1288 } 1289 CARP_UNLOCK(cif); 1290 1291 return (NULL); 1292 } 1293 #endif 1294 1295 struct ifnet * 1296 carp_forus(void *v, void *dhost) 1297 { 1298 struct carp_if *cif = v; 1299 struct carp_softc *vh; 1300 u_int8_t *ena = dhost; 1301 1302 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1303 return (NULL); 1304 1305 CARP_LOCK(cif); 1306 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1307 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1308 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1309 vh->sc_state == MASTER && 1310 !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { 1311 CARP_UNLOCK(cif); 1312 return (SC2IFP(vh)); 1313 } 1314 1315 CARP_UNLOCK(cif); 1316 return (NULL); 1317 } 1318 1319 static void 1320 carp_master_down(void *v) 1321 { 1322 struct carp_softc *sc = v; 1323 1324 CARP_SCLOCK(sc); 1325 carp_master_down_locked(sc); 1326 CARP_SCUNLOCK(sc); 1327 } 1328 1329 static void 1330 carp_master_down_locked(struct carp_softc *sc) 1331 { 1332 if (sc->sc_carpdev) 1333 CARP_SCLOCK_ASSERT(sc); 1334 1335 switch (sc->sc_state) { 1336 case INIT: 1337 printf("%s: master_down event in INIT state\n", 1338 SC2IFP(sc)->if_xname); 1339 break; 1340 case MASTER: 1341 break; 1342 case BACKUP: 1343 carp_set_state(sc, MASTER); 1344 carp_send_ad_locked(sc); 1345 carp_send_arp(sc); 1346 #ifdef INET6 1347 carp_send_na(sc); 1348 #endif /* INET6 */ 1349 carp_setrun(sc, 0); 1350 carp_setroute(sc, RTM_ADD); 1351 break; 1352 } 1353 } 1354 1355 /* 1356 * When in backup state, af indicates whether to reset the master down timer 1357 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1358 */ 1359 static void 1360 carp_setrun(struct carp_softc *sc, sa_family_t af) 1361 { 1362 struct timeval tv; 1363 1364 if (sc->sc_carpdev == NULL) { 1365 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1366 carp_set_state(sc, INIT); 1367 return; 1368 } else 1369 CARP_SCLOCK_ASSERT(sc); 1370 1371 if (SC2IFP(sc)->if_flags & IFF_UP && 1372 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) 1373 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 1374 else { 1375 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1376 carp_setroute(sc, RTM_DELETE); 1377 return; 1378 } 1379 1380 switch (sc->sc_state) { 1381 case INIT: 1382 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1383 carp_send_ad_locked(sc); 1384 carp_send_arp(sc); 1385 #ifdef INET6 1386 carp_send_na(sc); 1387 #endif /* INET6 */ 1388 CARP_LOG("%s: INIT -> MASTER (preempting)\n", 1389 SC2IFP(sc)->if_xname); 1390 carp_set_state(sc, MASTER); 1391 carp_setroute(sc, RTM_ADD); 1392 } else { 1393 CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); 1394 carp_set_state(sc, BACKUP); 1395 carp_setroute(sc, RTM_DELETE); 1396 carp_setrun(sc, 0); 1397 } 1398 break; 1399 case BACKUP: 1400 callout_stop(&sc->sc_ad_tmo); 1401 tv.tv_sec = 3 * sc->sc_advbase; 1402 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1403 switch (af) { 1404 #ifdef INET 1405 case AF_INET: 1406 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1407 carp_master_down, sc); 1408 break; 1409 #endif /* INET */ 1410 #ifdef INET6 1411 case AF_INET6: 1412 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1413 carp_master_down, sc); 1414 break; 1415 #endif /* INET6 */ 1416 default: 1417 if (sc->sc_naddrs) 1418 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1419 carp_master_down, sc); 1420 if (sc->sc_naddrs6) 1421 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1422 carp_master_down, sc); 1423 break; 1424 } 1425 break; 1426 case MASTER: 1427 tv.tv_sec = sc->sc_advbase; 1428 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1429 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1430 carp_send_ad, sc); 1431 break; 1432 } 1433 } 1434 1435 static void 1436 carp_multicast_cleanup(struct carp_softc *sc) 1437 { 1438 struct ip_moptions *imo = &sc->sc_imo; 1439 u_int16_t n = imo->imo_num_memberships; 1440 1441 /* Clean up our own multicast memberships */ 1442 while (n-- > 0) { 1443 if (imo->imo_membership[n] != NULL) { 1444 in_delmulti(imo->imo_membership[n]); 1445 imo->imo_membership[n] = NULL; 1446 } 1447 } 1448 KASSERT(imo->imo_mfilters == NULL, 1449 ("%s: imo_mfilters != NULL", __func__)); 1450 imo->imo_num_memberships = 0; 1451 imo->imo_multicast_ifp = NULL; 1452 } 1453 1454 #ifdef INET6 1455 static void 1456 carp_multicast6_cleanup(struct carp_softc *sc) 1457 { 1458 struct ip6_moptions *im6o = &sc->sc_im6o; 1459 u_int16_t n = im6o->im6o_num_memberships; 1460 1461 while (n-- > 0) { 1462 if (im6o->im6o_membership[n] != NULL) { 1463 in6_mc_leave(im6o->im6o_membership[n], NULL); 1464 im6o->im6o_membership[n] = NULL; 1465 } 1466 } 1467 KASSERT(im6o->im6o_mfilters == NULL, 1468 ("%s: im6o_mfilters != NULL", __func__)); 1469 im6o->im6o_num_memberships = 0; 1470 im6o->im6o_multicast_ifp = NULL; 1471 } 1472 #endif 1473 1474 static int 1475 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1476 { 1477 struct ifnet *ifp; 1478 struct carp_if *cif; 1479 struct in_ifaddr *ia, *ia_if; 1480 struct ip_moptions *imo = &sc->sc_imo; 1481 struct in_addr addr; 1482 u_long iaddr = htonl(sin->sin_addr.s_addr); 1483 int own, error; 1484 1485 if (sin->sin_addr.s_addr == 0) { 1486 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1487 carp_set_state(sc, INIT); 1488 if (sc->sc_naddrs) 1489 SC2IFP(sc)->if_flags |= IFF_UP; 1490 if (sc->sc_carpdev) 1491 CARP_SCLOCK(sc); 1492 carp_setrun(sc, 0); 1493 if (sc->sc_carpdev) 1494 CARP_SCUNLOCK(sc); 1495 return (0); 1496 } 1497 1498 /* we have to do it by hands to check we won't match on us */ 1499 ia_if = NULL; own = 0; 1500 IN_IFADDR_RLOCK(); 1501 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1502 /* and, yeah, we need a multicast-capable iface too */ 1503 if (ia->ia_ifp != SC2IFP(sc) && 1504 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1505 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1506 if (!ia_if) 1507 ia_if = ia; 1508 if (sin->sin_addr.s_addr == 1509 ia->ia_addr.sin_addr.s_addr) 1510 own++; 1511 } 1512 } 1513 1514 if (!ia_if) { 1515 IN_IFADDR_RUNLOCK(); 1516 return (EADDRNOTAVAIL); 1517 } 1518 1519 ia = ia_if; 1520 ifa_ref(&ia->ia_ifa); 1521 IN_IFADDR_RUNLOCK(); 1522 1523 ifp = ia->ia_ifp; 1524 1525 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1526 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) { 1527 ifa_free(&ia->ia_ifa); 1528 return (EADDRNOTAVAIL); 1529 } 1530 1531 if (imo->imo_num_memberships == 0) { 1532 addr.s_addr = htonl(INADDR_CARP_GROUP); 1533 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == 1534 NULL) { 1535 ifa_free(&ia->ia_ifa); 1536 return (ENOBUFS); 1537 } 1538 imo->imo_num_memberships++; 1539 imo->imo_multicast_ifp = ifp; 1540 imo->imo_multicast_ttl = CARP_DFLTTL; 1541 imo->imo_multicast_loop = 0; 1542 } 1543 1544 if (!ifp->if_carp) { 1545 1546 cif = malloc(sizeof(*cif), M_CARP, 1547 M_WAITOK|M_ZERO); 1548 if (!cif) { 1549 error = ENOBUFS; 1550 goto cleanup; 1551 } 1552 if ((error = ifpromisc(ifp, 1))) { 1553 free(cif, M_CARP); 1554 goto cleanup; 1555 } 1556 1557 CARP_LOCK_INIT(cif); 1558 CARP_LOCK(cif); 1559 cif->vhif_ifp = ifp; 1560 TAILQ_INIT(&cif->vhif_vrs); 1561 ifp->if_carp = cif; 1562 1563 } else { 1564 struct carp_softc *vr; 1565 1566 cif = (struct carp_if *)ifp->if_carp; 1567 CARP_LOCK(cif); 1568 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1569 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1570 CARP_UNLOCK(cif); 1571 error = EEXIST; 1572 goto cleanup; 1573 } 1574 } 1575 sc->sc_ia = ia; 1576 sc->sc_carpdev = ifp; 1577 1578 { /* XXX prevent endless loop if already in queue */ 1579 struct carp_softc *vr, *after = NULL; 1580 int myself = 0; 1581 cif = (struct carp_if *)ifp->if_carp; 1582 1583 /* XXX: cif should not change, right? So we still hold the lock */ 1584 CARP_LOCK_ASSERT(cif); 1585 1586 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1587 if (vr == sc) 1588 myself = 1; 1589 if (vr->sc_vhid < sc->sc_vhid) 1590 after = vr; 1591 } 1592 1593 if (!myself) { 1594 /* We're trying to keep things in order */ 1595 if (after == NULL) { 1596 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1597 } else { 1598 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1599 } 1600 cif->vhif_nvrs++; 1601 } 1602 } 1603 1604 sc->sc_naddrs++; 1605 SC2IFP(sc)->if_flags |= IFF_UP; 1606 if (own) 1607 sc->sc_advskew = 0; 1608 carp_sc_state_locked(sc); 1609 carp_setrun(sc, 0); 1610 1611 CARP_UNLOCK(cif); 1612 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1613 1614 return (0); 1615 1616 cleanup: 1617 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1618 ifa_free(&ia->ia_ifa); 1619 return (error); 1620 } 1621 1622 static int 1623 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1624 { 1625 int error = 0; 1626 1627 if (!--sc->sc_naddrs) { 1628 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1629 struct ip_moptions *imo = &sc->sc_imo; 1630 1631 CARP_LOCK(cif); 1632 callout_stop(&sc->sc_ad_tmo); 1633 SC2IFP(sc)->if_flags &= ~IFF_UP; 1634 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1635 sc->sc_vhid = -1; 1636 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1637 imo->imo_multicast_ifp = NULL; 1638 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1639 if (!--cif->vhif_nvrs) { 1640 sc->sc_carpdev->if_carp = NULL; 1641 CARP_LOCK_DESTROY(cif); 1642 free(cif, M_CARP); 1643 } else { 1644 CARP_UNLOCK(cif); 1645 } 1646 } 1647 1648 return (error); 1649 } 1650 1651 #ifdef INET6 1652 static int 1653 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1654 { 1655 struct ifnet *ifp; 1656 struct carp_if *cif; 1657 struct in6_ifaddr *ia, *ia_if; 1658 struct ip6_moptions *im6o = &sc->sc_im6o; 1659 struct in6_addr in6; 1660 int own, error; 1661 1662 error = 0; 1663 1664 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1665 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1666 carp_set_state(sc, INIT); 1667 if (sc->sc_naddrs6) 1668 SC2IFP(sc)->if_flags |= IFF_UP; 1669 if (sc->sc_carpdev) 1670 CARP_SCLOCK(sc); 1671 carp_setrun(sc, 0); 1672 if (sc->sc_carpdev) 1673 CARP_SCUNLOCK(sc); 1674 return (0); 1675 } 1676 1677 /* we have to do it by hands to check we won't match on us */ 1678 ia_if = NULL; own = 0; 1679 IN6_IFADDR_RLOCK(); 1680 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 1681 int i; 1682 1683 for (i = 0; i < 4; i++) { 1684 if ((sin6->sin6_addr.s6_addr32[i] & 1685 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1686 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1687 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1688 break; 1689 } 1690 /* and, yeah, we need a multicast-capable iface too */ 1691 if (ia->ia_ifp != SC2IFP(sc) && 1692 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1693 (i == 4)) { 1694 if (!ia_if) 1695 ia_if = ia; 1696 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1697 &ia->ia_addr.sin6_addr)) 1698 own++; 1699 } 1700 } 1701 1702 if (!ia_if) { 1703 IN6_IFADDR_RUNLOCK(); 1704 return (EADDRNOTAVAIL); 1705 } 1706 ia = ia_if; 1707 ifa_ref(&ia->ia_ifa); 1708 IN6_IFADDR_RUNLOCK(); 1709 ifp = ia->ia_ifp; 1710 1711 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1712 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) { 1713 ifa_free(&ia->ia_ifa); 1714 return (EADDRNOTAVAIL); 1715 } 1716 1717 if (!sc->sc_naddrs6) { 1718 struct in6_multi *in6m; 1719 1720 im6o->im6o_multicast_ifp = ifp; 1721 1722 /* join CARP multicast address */ 1723 bzero(&in6, sizeof(in6)); 1724 in6.s6_addr16[0] = htons(0xff02); 1725 in6.s6_addr8[15] = 0x12; 1726 if (in6_setscope(&in6, ifp, NULL) != 0) 1727 goto cleanup; 1728 in6m = NULL; 1729 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1730 if (error) 1731 goto cleanup; 1732 im6o->im6o_membership[0] = in6m; 1733 im6o->im6o_num_memberships++; 1734 1735 /* join solicited multicast address */ 1736 bzero(&in6, sizeof(in6)); 1737 in6.s6_addr16[0] = htons(0xff02); 1738 in6.s6_addr32[1] = 0; 1739 in6.s6_addr32[2] = htonl(1); 1740 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1741 in6.s6_addr8[12] = 0xff; 1742 if (in6_setscope(&in6, ifp, NULL) != 0) 1743 goto cleanup; 1744 in6m = NULL; 1745 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1746 if (error) 1747 goto cleanup; 1748 im6o->im6o_membership[1] = in6m; 1749 im6o->im6o_num_memberships++; 1750 } 1751 1752 if (!ifp->if_carp) { 1753 cif = malloc(sizeof(*cif), M_CARP, 1754 M_WAITOK|M_ZERO); 1755 if (!cif) { 1756 error = ENOBUFS; 1757 goto cleanup; 1758 } 1759 if ((error = ifpromisc(ifp, 1))) { 1760 free(cif, M_CARP); 1761 goto cleanup; 1762 } 1763 1764 CARP_LOCK_INIT(cif); 1765 CARP_LOCK(cif); 1766 cif->vhif_ifp = ifp; 1767 TAILQ_INIT(&cif->vhif_vrs); 1768 ifp->if_carp = cif; 1769 1770 } else { 1771 struct carp_softc *vr; 1772 1773 cif = (struct carp_if *)ifp->if_carp; 1774 CARP_LOCK(cif); 1775 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1776 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1777 CARP_UNLOCK(cif); 1778 error = EINVAL; 1779 goto cleanup; 1780 } 1781 } 1782 sc->sc_ia6 = ia; 1783 sc->sc_carpdev = ifp; 1784 1785 { /* XXX prevent endless loop if already in queue */ 1786 struct carp_softc *vr, *after = NULL; 1787 int myself = 0; 1788 cif = (struct carp_if *)ifp->if_carp; 1789 CARP_LOCK_ASSERT(cif); 1790 1791 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1792 if (vr == sc) 1793 myself = 1; 1794 if (vr->sc_vhid < sc->sc_vhid) 1795 after = vr; 1796 } 1797 1798 if (!myself) { 1799 /* We're trying to keep things in order */ 1800 if (after == NULL) { 1801 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1802 } else { 1803 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1804 } 1805 cif->vhif_nvrs++; 1806 } 1807 } 1808 1809 sc->sc_naddrs6++; 1810 SC2IFP(sc)->if_flags |= IFF_UP; 1811 if (own) 1812 sc->sc_advskew = 0; 1813 carp_sc_state_locked(sc); 1814 carp_setrun(sc, 0); 1815 1816 CARP_UNLOCK(cif); 1817 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1818 1819 return (0); 1820 1821 cleanup: 1822 if (!sc->sc_naddrs6) 1823 carp_multicast6_cleanup(sc); 1824 ifa_free(&ia->ia_ifa); 1825 return (error); 1826 } 1827 1828 static int 1829 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1830 { 1831 int error = 0; 1832 1833 if (!--sc->sc_naddrs6) { 1834 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1835 1836 CARP_LOCK(cif); 1837 callout_stop(&sc->sc_ad_tmo); 1838 SC2IFP(sc)->if_flags &= ~IFF_UP; 1839 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1840 sc->sc_vhid = -1; 1841 carp_multicast6_cleanup(sc); 1842 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1843 if (!--cif->vhif_nvrs) { 1844 CARP_LOCK_DESTROY(cif); 1845 sc->sc_carpdev->if_carp = NULL; 1846 free(cif, M_CARP); 1847 } else 1848 CARP_UNLOCK(cif); 1849 } 1850 1851 return (error); 1852 } 1853 #endif /* INET6 */ 1854 1855 static int 1856 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1857 { 1858 struct carp_softc *sc = ifp->if_softc, *vr; 1859 struct carpreq carpr; 1860 struct ifaddr *ifa; 1861 struct ifreq *ifr; 1862 struct ifaliasreq *ifra; 1863 int locked = 0, error = 0; 1864 1865 ifa = (struct ifaddr *)addr; 1866 ifra = (struct ifaliasreq *)addr; 1867 ifr = (struct ifreq *)addr; 1868 1869 switch (cmd) { 1870 case SIOCSIFADDR: 1871 switch (ifa->ifa_addr->sa_family) { 1872 #ifdef INET 1873 case AF_INET: 1874 SC2IFP(sc)->if_flags |= IFF_UP; 1875 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1876 sizeof(struct sockaddr)); 1877 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1878 break; 1879 #endif /* INET */ 1880 #ifdef INET6 1881 case AF_INET6: 1882 SC2IFP(sc)->if_flags |= IFF_UP; 1883 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1884 break; 1885 #endif /* INET6 */ 1886 default: 1887 error = EAFNOSUPPORT; 1888 break; 1889 } 1890 break; 1891 1892 case SIOCAIFADDR: 1893 switch (ifa->ifa_addr->sa_family) { 1894 #ifdef INET 1895 case AF_INET: 1896 SC2IFP(sc)->if_flags |= IFF_UP; 1897 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1898 sizeof(struct sockaddr)); 1899 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1900 break; 1901 #endif /* INET */ 1902 #ifdef INET6 1903 case AF_INET6: 1904 SC2IFP(sc)->if_flags |= IFF_UP; 1905 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1906 break; 1907 #endif /* INET6 */ 1908 default: 1909 error = EAFNOSUPPORT; 1910 break; 1911 } 1912 break; 1913 1914 case SIOCDIFADDR: 1915 switch (ifa->ifa_addr->sa_family) { 1916 #ifdef INET 1917 case AF_INET: 1918 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1919 break; 1920 #endif /* INET */ 1921 #ifdef INET6 1922 case AF_INET6: 1923 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1924 break; 1925 #endif /* INET6 */ 1926 default: 1927 error = EAFNOSUPPORT; 1928 break; 1929 } 1930 break; 1931 1932 case SIOCSIFFLAGS: 1933 if (sc->sc_carpdev) { 1934 locked = 1; 1935 CARP_SCLOCK(sc); 1936 } 1937 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1938 callout_stop(&sc->sc_ad_tmo); 1939 callout_stop(&sc->sc_md_tmo); 1940 callout_stop(&sc->sc_md6_tmo); 1941 if (sc->sc_state == MASTER) 1942 carp_send_ad_locked(sc); 1943 carp_set_state(sc, INIT); 1944 carp_setrun(sc, 0); 1945 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1946 SC2IFP(sc)->if_flags |= IFF_UP; 1947 carp_setrun(sc, 0); 1948 } 1949 break; 1950 1951 case SIOCSVH: 1952 error = priv_check(curthread, PRIV_NETINET_CARP); 1953 if (error) 1954 break; 1955 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1956 break; 1957 error = 1; 1958 if (sc->sc_carpdev) { 1959 locked = 1; 1960 CARP_SCLOCK(sc); 1961 } 1962 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1963 switch (carpr.carpr_state) { 1964 case BACKUP: 1965 callout_stop(&sc->sc_ad_tmo); 1966 carp_set_state(sc, BACKUP); 1967 carp_setrun(sc, 0); 1968 carp_setroute(sc, RTM_DELETE); 1969 break; 1970 case MASTER: 1971 carp_master_down_locked(sc); 1972 break; 1973 default: 1974 break; 1975 } 1976 } 1977 if (carpr.carpr_vhid > 0) { 1978 if (carpr.carpr_vhid > 255) { 1979 error = EINVAL; 1980 break; 1981 } 1982 if (sc->sc_carpdev) { 1983 struct carp_if *cif; 1984 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1985 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1986 if (vr != sc && 1987 vr->sc_vhid == carpr.carpr_vhid) { 1988 error = EEXIST; 1989 break; 1990 } 1991 if (error == EEXIST) 1992 break; 1993 } 1994 sc->sc_vhid = carpr.carpr_vhid; 1995 IF_LLADDR(sc->sc_ifp)[0] = 0; 1996 IF_LLADDR(sc->sc_ifp)[1] = 0; 1997 IF_LLADDR(sc->sc_ifp)[2] = 0x5e; 1998 IF_LLADDR(sc->sc_ifp)[3] = 0; 1999 IF_LLADDR(sc->sc_ifp)[4] = 1; 2000 IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; 2001 error--; 2002 } 2003 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2004 if (carpr.carpr_advskew >= 255) { 2005 error = EINVAL; 2006 break; 2007 } 2008 if (carpr.carpr_advbase > 255) { 2009 error = EINVAL; 2010 break; 2011 } 2012 sc->sc_advbase = carpr.carpr_advbase; 2013 sc->sc_advskew = carpr.carpr_advskew; 2014 error--; 2015 } 2016 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2017 if (error > 0) 2018 error = EINVAL; 2019 else { 2020 error = 0; 2021 carp_setrun(sc, 0); 2022 } 2023 break; 2024 2025 case SIOCGVH: 2026 /* XXX: lockless read */ 2027 bzero(&carpr, sizeof(carpr)); 2028 carpr.carpr_state = sc->sc_state; 2029 carpr.carpr_vhid = sc->sc_vhid; 2030 carpr.carpr_advbase = sc->sc_advbase; 2031 carpr.carpr_advskew = sc->sc_advskew; 2032 error = priv_check(curthread, PRIV_NETINET_CARP); 2033 if (error == 0) 2034 bcopy(sc->sc_key, carpr.carpr_key, 2035 sizeof(carpr.carpr_key)); 2036 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2037 break; 2038 2039 default: 2040 error = EINVAL; 2041 } 2042 2043 if (locked) 2044 CARP_SCUNLOCK(sc); 2045 2046 carp_hmac_prepare(sc); 2047 2048 return (error); 2049 } 2050 2051 /* 2052 * XXX: this is looutput. We should eventually use it from there. 2053 */ 2054 static int 2055 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2056 struct route *ro) 2057 { 2058 u_int32_t af; 2059 struct rtentry *rt = NULL; 2060 2061 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 2062 2063 if (ro != NULL) 2064 rt = ro->ro_rt; 2065 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2066 m_freem(m); 2067 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 2068 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 2069 } 2070 2071 ifp->if_opackets++; 2072 ifp->if_obytes += m->m_pkthdr.len; 2073 2074 /* BPF writes need to be handled specially. */ 2075 if (dst->sa_family == AF_UNSPEC) { 2076 bcopy(dst->sa_data, &af, sizeof(af)); 2077 dst->sa_family = af; 2078 } 2079 2080 #if 1 /* XXX */ 2081 switch (dst->sa_family) { 2082 case AF_INET: 2083 case AF_INET6: 2084 case AF_IPX: 2085 case AF_APPLETALK: 2086 break; 2087 default: 2088 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 2089 m_freem(m); 2090 return (EAFNOSUPPORT); 2091 } 2092 #endif 2093 return(if_simloop(ifp, m, dst->sa_family, 0)); 2094 } 2095 2096 /* 2097 * Start output on carp interface. This function should never be called. 2098 */ 2099 static void 2100 carp_start(struct ifnet *ifp) 2101 { 2102 #ifdef DEBUG 2103 printf("%s: start called\n", ifp->if_xname); 2104 #endif 2105 } 2106 2107 int 2108 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2109 struct rtentry *rt) 2110 { 2111 struct m_tag *mtag; 2112 struct carp_softc *sc; 2113 struct ifnet *carp_ifp; 2114 2115 if (!sa) 2116 return (0); 2117 2118 switch (sa->sa_family) { 2119 #ifdef INET 2120 case AF_INET: 2121 break; 2122 #endif /* INET */ 2123 #ifdef INET6 2124 case AF_INET6: 2125 break; 2126 #endif /* INET6 */ 2127 default: 2128 return (0); 2129 } 2130 2131 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2132 if (mtag == NULL) 2133 return (0); 2134 2135 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2136 sc = carp_ifp->if_softc; 2137 2138 /* Set the source MAC address to Virtual Router MAC Address */ 2139 switch (ifp->if_type) { 2140 case IFT_ETHER: 2141 case IFT_L2VLAN: { 2142 struct ether_header *eh; 2143 2144 eh = mtod(m, struct ether_header *); 2145 eh->ether_shost[0] = 0; 2146 eh->ether_shost[1] = 0; 2147 eh->ether_shost[2] = 0x5e; 2148 eh->ether_shost[3] = 0; 2149 eh->ether_shost[4] = 1; 2150 eh->ether_shost[5] = sc->sc_vhid; 2151 } 2152 break; 2153 case IFT_FDDI: { 2154 struct fddi_header *fh; 2155 2156 fh = mtod(m, struct fddi_header *); 2157 fh->fddi_shost[0] = 0; 2158 fh->fddi_shost[1] = 0; 2159 fh->fddi_shost[2] = 0x5e; 2160 fh->fddi_shost[3] = 0; 2161 fh->fddi_shost[4] = 1; 2162 fh->fddi_shost[5] = sc->sc_vhid; 2163 } 2164 break; 2165 case IFT_ISO88025: { 2166 struct iso88025_header *th; 2167 th = mtod(m, struct iso88025_header *); 2168 th->iso88025_shost[0] = 3; 2169 th->iso88025_shost[1] = 0; 2170 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2171 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2172 th->iso88025_shost[4] = 0; 2173 th->iso88025_shost[5] = 0; 2174 } 2175 break; 2176 default: 2177 printf("%s: carp is not supported for this interface type\n", 2178 ifp->if_xname); 2179 return (EOPNOTSUPP); 2180 } 2181 2182 return (0); 2183 } 2184 2185 static void 2186 carp_set_state(struct carp_softc *sc, int state) 2187 { 2188 int link_state; 2189 2190 if (sc->sc_carpdev) 2191 CARP_SCLOCK_ASSERT(sc); 2192 2193 if (sc->sc_state == state) 2194 return; 2195 2196 sc->sc_state = state; 2197 switch (state) { 2198 case BACKUP: 2199 link_state = LINK_STATE_DOWN; 2200 break; 2201 case MASTER: 2202 link_state = LINK_STATE_UP; 2203 break; 2204 default: 2205 link_state = LINK_STATE_UNKNOWN; 2206 break; 2207 } 2208 if_link_state_change(SC2IFP(sc), link_state); 2209 } 2210 2211 void 2212 carp_carpdev_state(void *v) 2213 { 2214 struct carp_if *cif = v; 2215 2216 CARP_LOCK(cif); 2217 carp_carpdev_state_locked(cif); 2218 CARP_UNLOCK(cif); 2219 } 2220 2221 static void 2222 carp_carpdev_state_locked(struct carp_if *cif) 2223 { 2224 struct carp_softc *sc; 2225 2226 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2227 carp_sc_state_locked(sc); 2228 } 2229 2230 static void 2231 carp_sc_state_locked(struct carp_softc *sc) 2232 { 2233 CARP_SCLOCK_ASSERT(sc); 2234 2235 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2236 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2237 sc->sc_flags_backup = SC2IFP(sc)->if_flags; 2238 SC2IFP(sc)->if_flags &= ~IFF_UP; 2239 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 2240 callout_stop(&sc->sc_ad_tmo); 2241 callout_stop(&sc->sc_md_tmo); 2242 callout_stop(&sc->sc_md6_tmo); 2243 carp_set_state(sc, INIT); 2244 carp_setrun(sc, 0); 2245 if (!sc->sc_suppress) { 2246 carp_suppress_preempt++; 2247 if (carp_suppress_preempt == 1) { 2248 CARP_SCUNLOCK(sc); 2249 carp_send_ad_all(); 2250 CARP_SCLOCK(sc); 2251 } 2252 } 2253 sc->sc_suppress = 1; 2254 } else { 2255 SC2IFP(sc)->if_flags |= sc->sc_flags_backup; 2256 carp_set_state(sc, INIT); 2257 carp_setrun(sc, 0); 2258 if (sc->sc_suppress) 2259 carp_suppress_preempt--; 2260 sc->sc_suppress = 0; 2261 } 2262 2263 return; 2264 } 2265 2266 static int 2267 carp_modevent(module_t mod, int type, void *data) 2268 { 2269 switch (type) { 2270 case MOD_LOAD: 2271 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2272 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 2273 if (if_detach_event_tag == NULL) 2274 return (ENOMEM); 2275 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2276 LIST_INIT(&carpif_list); 2277 if_clone_attach(&carp_cloner); 2278 break; 2279 2280 case MOD_UNLOAD: 2281 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2282 if_clone_detach(&carp_cloner); 2283 mtx_destroy(&carp_mtx); 2284 break; 2285 2286 default: 2287 return (EINVAL); 2288 } 2289 2290 return (0); 2291 } 2292 2293 static moduledata_t carp_mod = { 2294 "carp", 2295 carp_modevent, 2296 0 2297 }; 2298 2299 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2300