1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/protosw.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/signalvar.h> 50 #include <sys/filio.h> 51 #include <sys/sockio.h> 52 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 56 #include <machine/stdarg.h> 57 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 #include <net/fddi.h> 61 #include <net/iso88025.h> 62 #include <net/if.h> 63 #include <net/if_clone.h> 64 #include <net/if_dl.h> 65 #include <net/if_types.h> 66 #include <net/route.h> 67 #include <net/vnet.h> 68 69 #if defined(INET) || defined(INET6) 70 #include <netinet/in.h> 71 #include <netinet/in_var.h> 72 #include <netinet/ip_carp.h> 73 #include <netinet/ip.h> 74 75 #include <machine/in_cksum.h> 76 #endif 77 78 #ifdef INET 79 #include <netinet/in_systm.h> 80 #include <netinet/ip_var.h> 81 #include <netinet/if_ether.h> 82 #endif 83 84 #ifdef INET6 85 #include <netinet/icmp6.h> 86 #include <netinet/ip6.h> 87 #include <netinet6/ip6protosw.h> 88 #include <netinet6/ip6_var.h> 89 #include <netinet6/scope6_var.h> 90 #include <netinet6/in6_var.h> 91 #include <netinet6/nd6.h> 92 #endif 93 94 #include <crypto/sha1.h> 95 96 #define CARP_IFNAME "carp" 97 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 98 SYSCTL_DECL(_net_inet_carp); 99 100 struct carp_softc { 101 struct ifnet *sc_ifp; /* Interface clue */ 102 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 103 struct in_ifaddr *sc_ia; /* primary iface address */ 104 #ifdef INET 105 struct ip_moptions sc_imo; 106 #endif 107 #ifdef INET6 108 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 109 struct ip6_moptions sc_im6o; 110 #endif /* INET6 */ 111 TAILQ_ENTRY(carp_softc) sc_list; 112 113 enum { INIT = 0, BACKUP, MASTER } sc_state; 114 115 int sc_flags_backup; 116 int sc_suppress; 117 118 int sc_sendad_errors; 119 #define CARP_SENDAD_MAX_ERRORS 3 120 int sc_sendad_success; 121 #define CARP_SENDAD_MIN_SUCCESS 3 122 123 int sc_vhid; 124 int sc_advskew; 125 int sc_naddrs; 126 int sc_naddrs6; 127 int sc_advbase; /* seconds */ 128 int sc_init_counter; 129 u_int64_t sc_counter; 130 131 /* authentication */ 132 #define CARP_HMAC_PAD 64 133 unsigned char sc_key[CARP_KEY_LEN]; 134 unsigned char sc_pad[CARP_HMAC_PAD]; 135 SHA1_CTX sc_sha1; 136 137 struct callout sc_ad_tmo; /* advertisement timeout */ 138 struct callout sc_md_tmo; /* master down timeout */ 139 struct callout sc_md6_tmo; /* master down timeout */ 140 141 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 142 }; 143 #define SC2IFP(sc) ((sc)->sc_ifp) 144 145 int carp_suppress_preempt = 0; 146 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 147 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 148 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 149 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 150 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 151 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 152 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 153 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 154 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 155 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 156 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 157 &carp_suppress_preempt, 0, "Preemption is suppressed"); 158 159 struct carpstats carpstats; 160 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 161 &carpstats, carpstats, 162 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 163 164 struct carp_if { 165 TAILQ_HEAD(, carp_softc) vhif_vrs; 166 int vhif_nvrs; 167 168 struct ifnet *vhif_ifp; 169 struct mtx vhif_mtx; 170 }; 171 172 #define CARP_INET 0 173 #define CARP_INET6 1 174 static int proto_reg[] = {-1, -1}; 175 176 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 177 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 178 179 /* lock per carp_if queue */ 180 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 181 NULL, MTX_DEF) 182 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 183 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 184 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 185 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 186 187 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 188 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 189 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 190 191 #define CARP_LOG(...) do { \ 192 if (carp_opts[CARPCTL_LOG] > 0) \ 193 log(LOG_INFO, __VA_ARGS__); \ 194 } while (0) 195 196 #define CARP_DEBUG(...) do { \ 197 if (carp_opts[CARPCTL_LOG] > 1) \ 198 log(LOG_DEBUG, __VA_ARGS__); \ 199 } while (0) 200 201 static void carp_hmac_prepare(struct carp_softc *); 202 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 203 unsigned char *); 204 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 205 unsigned char *); 206 static void carp_setroute(struct carp_softc *, int); 207 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 208 static int carp_clone_create(struct if_clone *, int, caddr_t); 209 static void carp_clone_destroy(struct ifnet *); 210 static void carpdetach(struct carp_softc *, int); 211 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 212 struct carp_header *); 213 static void carp_send_ad_all(void); 214 static void carp_send_ad(void *); 215 static void carp_send_ad_locked(struct carp_softc *); 216 #ifdef INET 217 static void carp_send_arp(struct carp_softc *); 218 #endif 219 static void carp_master_down(void *); 220 static void carp_master_down_locked(struct carp_softc *); 221 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 222 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 223 struct route *); 224 static void carp_start(struct ifnet *); 225 static void carp_setrun(struct carp_softc *, sa_family_t); 226 static void carp_set_state(struct carp_softc *, int); 227 #ifdef INET 228 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 229 #endif 230 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 231 232 #ifdef INET 233 static void carp_multicast_cleanup(struct carp_softc *, int dofree); 234 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 235 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 236 #endif 237 static void carp_carpdev_state_locked(struct carp_if *); 238 static void carp_sc_state_locked(struct carp_softc *); 239 #ifdef INET6 240 static void carp_send_na(struct carp_softc *); 241 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 242 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 243 static void carp_multicast6_cleanup(struct carp_softc *, int dofree); 244 #endif 245 246 static LIST_HEAD(, carp_softc) carpif_list; 247 static struct mtx carp_mtx; 248 IFC_SIMPLE_DECLARE(carp, 0); 249 250 static eventhandler_tag if_detach_event_tag; 251 252 static __inline u_int16_t 253 carp_cksum(struct mbuf *m, int len) 254 { 255 return (in_cksum(m, len)); 256 } 257 258 static void 259 carp_hmac_prepare(struct carp_softc *sc) 260 { 261 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 262 u_int8_t vhid = sc->sc_vhid & 0xff; 263 struct ifaddr *ifa; 264 int i, found; 265 #ifdef INET 266 struct in_addr last, cur, in; 267 #endif 268 #ifdef INET6 269 struct in6_addr last6, cur6, in6; 270 #endif 271 272 if (sc->sc_carpdev) 273 CARP_SCLOCK(sc); 274 275 /* XXX: possible race here */ 276 277 /* compute ipad from key */ 278 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 279 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 280 for (i = 0; i < sizeof(sc->sc_pad); i++) 281 sc->sc_pad[i] ^= 0x36; 282 283 /* precompute first part of inner hash */ 284 SHA1Init(&sc->sc_sha1); 285 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 286 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 287 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 288 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 289 #ifdef INET 290 cur.s_addr = 0; 291 do { 292 found = 0; 293 last = cur; 294 cur.s_addr = 0xffffffff; 295 IF_ADDR_LOCK(SC2IFP(sc)); 296 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 297 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 298 if (ifa->ifa_addr->sa_family == AF_INET && 299 ntohl(in.s_addr) > ntohl(last.s_addr) && 300 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 301 cur.s_addr = in.s_addr; 302 found++; 303 } 304 } 305 IF_ADDR_UNLOCK(SC2IFP(sc)); 306 if (found) 307 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 308 } while (found); 309 #endif /* INET */ 310 #ifdef INET6 311 memset(&cur6, 0, sizeof(cur6)); 312 do { 313 found = 0; 314 last6 = cur6; 315 memset(&cur6, 0xff, sizeof(cur6)); 316 IF_ADDR_LOCK(SC2IFP(sc)); 317 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 318 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 319 if (IN6_IS_SCOPE_EMBED(&in6)) 320 in6.s6_addr16[1] = 0; 321 if (ifa->ifa_addr->sa_family == AF_INET6 && 322 memcmp(&in6, &last6, sizeof(in6)) > 0 && 323 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 324 cur6 = in6; 325 found++; 326 } 327 } 328 IF_ADDR_UNLOCK(SC2IFP(sc)); 329 if (found) 330 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 331 } while (found); 332 #endif /* INET6 */ 333 334 /* convert ipad to opad */ 335 for (i = 0; i < sizeof(sc->sc_pad); i++) 336 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 337 338 if (sc->sc_carpdev) 339 CARP_SCUNLOCK(sc); 340 } 341 342 static void 343 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 344 unsigned char md[20]) 345 { 346 SHA1_CTX sha1ctx; 347 348 /* fetch first half of inner hash */ 349 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 350 351 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 352 SHA1Final(md, &sha1ctx); 353 354 /* outer hash */ 355 SHA1Init(&sha1ctx); 356 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 357 SHA1Update(&sha1ctx, md, 20); 358 SHA1Final(md, &sha1ctx); 359 } 360 361 static int 362 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 363 unsigned char md[20]) 364 { 365 unsigned char md2[20]; 366 367 CARP_SCLOCK_ASSERT(sc); 368 369 carp_hmac_generate(sc, counter, md2); 370 371 return (bcmp(md, md2, sizeof(md2))); 372 } 373 374 static void 375 carp_setroute(struct carp_softc *sc, int cmd) 376 { 377 struct ifaddr *ifa; 378 int s; 379 380 if (sc->sc_carpdev) 381 CARP_SCLOCK_ASSERT(sc); 382 383 s = splnet(); 384 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 385 #ifdef INET 386 if (ifa->ifa_addr->sa_family == AF_INET && 387 sc->sc_carpdev != NULL) { 388 int count = carp_addrcount( 389 (struct carp_if *)sc->sc_carpdev->if_carp, 390 ifatoia(ifa), CARP_COUNT_MASTER); 391 392 if ((cmd == RTM_ADD && count == 1) || 393 (cmd == RTM_DELETE && count == 0)) 394 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 395 } 396 #endif 397 } 398 splx(s); 399 } 400 401 static int 402 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) 403 { 404 405 struct carp_softc *sc; 406 struct ifnet *ifp; 407 408 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 409 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); 410 if (ifp == NULL) { 411 free(sc, M_CARP); 412 return (ENOSPC); 413 } 414 415 sc->sc_flags_backup = 0; 416 sc->sc_suppress = 0; 417 sc->sc_advbase = CARP_DFLTINTV; 418 sc->sc_vhid = -1; /* required setting */ 419 sc->sc_advskew = 0; 420 sc->sc_init_counter = 1; 421 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 422 #ifdef INET 423 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 424 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 425 M_WAITOK); 426 sc->sc_imo.imo_mfilters = NULL; 427 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 428 sc->sc_imo.imo_multicast_vif = -1; 429 #endif 430 #ifdef INET6 431 sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc( 432 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 433 M_WAITOK); 434 sc->sc_im6o.im6o_mfilters = NULL; 435 sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 436 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 437 #endif 438 439 callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE); 440 callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE); 441 callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE); 442 443 ifp->if_softc = sc; 444 if_initname(ifp, CARP_IFNAME, unit); 445 ifp->if_mtu = ETHERMTU; 446 ifp->if_flags = IFF_LOOPBACK; 447 ifp->if_ioctl = carp_ioctl; 448 ifp->if_output = carp_looutput; 449 ifp->if_start = carp_start; 450 ifp->if_type = IFT_CARP; 451 ifp->if_snd.ifq_maxlen = ifqmaxlen; 452 ifp->if_hdrlen = 0; 453 if_attach(ifp); 454 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 455 mtx_lock(&carp_mtx); 456 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 457 mtx_unlock(&carp_mtx); 458 return (0); 459 } 460 461 static void 462 carp_clone_destroy(struct ifnet *ifp) 463 { 464 struct carp_softc *sc = ifp->if_softc; 465 466 if (sc->sc_carpdev) 467 CARP_SCLOCK(sc); 468 carpdetach(sc, 1); /* Returns unlocked. */ 469 470 mtx_lock(&carp_mtx); 471 LIST_REMOVE(sc, sc_next); 472 mtx_unlock(&carp_mtx); 473 bpfdetach(ifp); 474 if_detach(ifp); 475 if_free_type(ifp, IFT_ETHER); 476 #ifdef INET 477 free(sc->sc_imo.imo_membership, M_CARP); 478 #endif 479 #ifdef INET6 480 free(sc->sc_im6o.im6o_membership, M_CARP); 481 #endif 482 free(sc, M_CARP); 483 } 484 485 /* 486 * This function can be called on CARP interface destroy path, 487 * and in case of the removal of the underlying interface as 488 * well. We differentiate these two cases: in case of destruction 489 * of the underlying interface, we do not cleanup our multicast 490 * memberships, since they are already freed. But we purge pointers 491 * to multicast structures, since they are no longer valid, to 492 * avoid panic in future calls to carpdetach(). Also, we do not 493 * release the lock on return, because the function will be 494 * called once more, for another CARP instance on the same 495 * interface. 496 */ 497 static void 498 carpdetach(struct carp_softc *sc, int unlock) 499 { 500 struct carp_if *cif; 501 502 callout_stop(&sc->sc_ad_tmo); 503 callout_stop(&sc->sc_md_tmo); 504 callout_stop(&sc->sc_md6_tmo); 505 506 if (sc->sc_suppress) 507 carp_suppress_preempt--; 508 sc->sc_suppress = 0; 509 510 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 511 carp_suppress_preempt--; 512 sc->sc_sendad_errors = 0; 513 514 carp_set_state(sc, INIT); 515 SC2IFP(sc)->if_flags &= ~IFF_UP; 516 carp_setrun(sc, 0); 517 #ifdef INET 518 carp_multicast_cleanup(sc, unlock); 519 #endif 520 #ifdef INET6 521 carp_multicast6_cleanup(sc, unlock); 522 #endif 523 524 if (sc->sc_carpdev != NULL) { 525 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 526 CARP_LOCK_ASSERT(cif); 527 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 528 if (!--cif->vhif_nvrs) { 529 ifpromisc(sc->sc_carpdev, 0); 530 sc->sc_carpdev->if_carp = NULL; 531 CARP_LOCK_DESTROY(cif); 532 free(cif, M_CARP); 533 } else if (unlock) 534 CARP_UNLOCK(cif); 535 sc->sc_carpdev = NULL; 536 } 537 } 538 539 /* Detach an interface from the carp. */ 540 static void 541 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 542 { 543 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 544 struct carp_softc *sc, *nextsc; 545 546 if (cif == NULL) 547 return; 548 549 /* 550 * XXX: At the end of for() cycle the lock will be destroyed. 551 */ 552 CARP_LOCK(cif); 553 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 554 nextsc = TAILQ_NEXT(sc, sc_list); 555 carpdetach(sc, 0); 556 } 557 } 558 559 /* 560 * process input packet. 561 * we have rearranged checks order compared to the rfc, 562 * but it seems more efficient this way or not possible otherwise. 563 */ 564 #ifdef INET 565 void 566 carp_input(struct mbuf *m, int hlen) 567 { 568 struct ip *ip = mtod(m, struct ip *); 569 struct carp_header *ch; 570 int iplen, len; 571 572 CARPSTATS_INC(carps_ipackets); 573 574 if (!carp_opts[CARPCTL_ALLOW]) { 575 m_freem(m); 576 return; 577 } 578 579 /* check if received on a valid carp interface */ 580 if (m->m_pkthdr.rcvif->if_carp == NULL) { 581 CARPSTATS_INC(carps_badif); 582 CARP_DEBUG("carp_input: packet received on non-carp " 583 "interface: %s\n", 584 m->m_pkthdr.rcvif->if_xname); 585 m_freem(m); 586 return; 587 } 588 589 /* verify that the IP TTL is 255. */ 590 if (ip->ip_ttl != CARP_DFLTTL) { 591 CARPSTATS_INC(carps_badttl); 592 CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n", 593 ip->ip_ttl, 594 m->m_pkthdr.rcvif->if_xname); 595 m_freem(m); 596 return; 597 } 598 599 iplen = ip->ip_hl << 2; 600 601 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 602 CARPSTATS_INC(carps_badlen); 603 CARP_DEBUG("carp_input: received len %zd < " 604 "sizeof(struct carp_header) on %s\n", 605 m->m_len - sizeof(struct ip), 606 m->m_pkthdr.rcvif->if_xname); 607 m_freem(m); 608 return; 609 } 610 611 if (iplen + sizeof(*ch) < m->m_len) { 612 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 613 CARPSTATS_INC(carps_hdrops); 614 CARP_DEBUG("carp_input: pullup failed\n"); 615 return; 616 } 617 ip = mtod(m, struct ip *); 618 } 619 ch = (struct carp_header *)((char *)ip + iplen); 620 621 /* 622 * verify that the received packet length is 623 * equal to the CARP header 624 */ 625 len = iplen + sizeof(*ch); 626 if (len > m->m_pkthdr.len) { 627 CARPSTATS_INC(carps_badlen); 628 CARP_DEBUG("carp_input: packet too short %d on %s\n", 629 m->m_pkthdr.len, 630 m->m_pkthdr.rcvif->if_xname); 631 m_freem(m); 632 return; 633 } 634 635 if ((m = m_pullup(m, len)) == NULL) { 636 CARPSTATS_INC(carps_hdrops); 637 return; 638 } 639 ip = mtod(m, struct ip *); 640 ch = (struct carp_header *)((char *)ip + iplen); 641 642 /* verify the CARP checksum */ 643 m->m_data += iplen; 644 if (carp_cksum(m, len - iplen)) { 645 CARPSTATS_INC(carps_badsum); 646 CARP_DEBUG("carp_input: checksum failed on %s\n", 647 m->m_pkthdr.rcvif->if_xname); 648 m_freem(m); 649 return; 650 } 651 m->m_data -= iplen; 652 653 carp_input_c(m, ch, AF_INET); 654 } 655 #endif 656 657 #ifdef INET6 658 int 659 carp6_input(struct mbuf **mp, int *offp, int proto) 660 { 661 struct mbuf *m = *mp; 662 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 663 struct carp_header *ch; 664 u_int len; 665 666 CARPSTATS_INC(carps_ipackets6); 667 668 if (!carp_opts[CARPCTL_ALLOW]) { 669 m_freem(m); 670 return (IPPROTO_DONE); 671 } 672 673 /* check if received on a valid carp interface */ 674 if (m->m_pkthdr.rcvif->if_carp == NULL) { 675 CARPSTATS_INC(carps_badif); 676 CARP_DEBUG("carp6_input: packet received on non-carp " 677 "interface: %s\n", 678 m->m_pkthdr.rcvif->if_xname); 679 m_freem(m); 680 return (IPPROTO_DONE); 681 } 682 683 /* verify that the IP TTL is 255 */ 684 if (ip6->ip6_hlim != CARP_DFLTTL) { 685 CARPSTATS_INC(carps_badttl); 686 CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n", 687 ip6->ip6_hlim, 688 m->m_pkthdr.rcvif->if_xname); 689 m_freem(m); 690 return (IPPROTO_DONE); 691 } 692 693 /* verify that we have a complete carp packet */ 694 len = m->m_len; 695 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 696 if (ch == NULL) { 697 CARPSTATS_INC(carps_badlen); 698 CARP_DEBUG("carp6_input: packet size %u too small\n", len); 699 return (IPPROTO_DONE); 700 } 701 702 703 /* verify the CARP checksum */ 704 m->m_data += *offp; 705 if (carp_cksum(m, sizeof(*ch))) { 706 CARPSTATS_INC(carps_badsum); 707 CARP_DEBUG("carp6_input: checksum failed, on %s\n", 708 m->m_pkthdr.rcvif->if_xname); 709 m_freem(m); 710 return (IPPROTO_DONE); 711 } 712 m->m_data -= *offp; 713 714 carp_input_c(m, ch, AF_INET6); 715 return (IPPROTO_DONE); 716 } 717 #endif /* INET6 */ 718 719 static void 720 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 721 { 722 struct ifnet *ifp = m->m_pkthdr.rcvif; 723 struct carp_softc *sc; 724 u_int64_t tmp_counter; 725 struct timeval sc_tv, ch_tv; 726 727 /* verify that the VHID is valid on the receiving interface */ 728 CARP_LOCK(ifp->if_carp); 729 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 730 if (sc->sc_vhid == ch->carp_vhid) 731 break; 732 733 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && 734 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 735 CARPSTATS_INC(carps_badvhid); 736 CARP_UNLOCK(ifp->if_carp); 737 m_freem(m); 738 return; 739 } 740 741 getmicrotime(&SC2IFP(sc)->if_lastchange); 742 SC2IFP(sc)->if_ipackets++; 743 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; 744 745 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { 746 uint32_t af1 = af; 747 #ifdef INET 748 struct ip *ip = mtod(m, struct ip *); 749 750 /* BPF wants net byte order */ 751 if (af == AF_INET) { 752 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 753 ip->ip_off = htons(ip->ip_off); 754 } 755 #endif 756 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); 757 } 758 759 /* verify the CARP version. */ 760 if (ch->carp_version != CARP_VERSION) { 761 CARPSTATS_INC(carps_badver); 762 SC2IFP(sc)->if_ierrors++; 763 CARP_UNLOCK(ifp->if_carp); 764 CARP_DEBUG("%s; invalid version %d\n", 765 SC2IFP(sc)->if_xname, 766 ch->carp_version); 767 m_freem(m); 768 return; 769 } 770 771 /* verify the hash */ 772 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 773 CARPSTATS_INC(carps_badauth); 774 SC2IFP(sc)->if_ierrors++; 775 CARP_UNLOCK(ifp->if_carp); 776 CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); 777 m_freem(m); 778 return; 779 } 780 781 tmp_counter = ntohl(ch->carp_counter[0]); 782 tmp_counter = tmp_counter<<32; 783 tmp_counter += ntohl(ch->carp_counter[1]); 784 785 /* XXX Replay protection goes here */ 786 787 sc->sc_init_counter = 0; 788 sc->sc_counter = tmp_counter; 789 790 sc_tv.tv_sec = sc->sc_advbase; 791 if (carp_suppress_preempt && sc->sc_advskew < 240) 792 sc_tv.tv_usec = 240 * 1000000 / 256; 793 else 794 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 795 ch_tv.tv_sec = ch->carp_advbase; 796 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 797 798 switch (sc->sc_state) { 799 case INIT: 800 break; 801 case MASTER: 802 /* 803 * If we receive an advertisement from a master who's going to 804 * be more frequent than us, go into BACKUP state. 805 */ 806 if (timevalcmp(&sc_tv, &ch_tv, >) || 807 timevalcmp(&sc_tv, &ch_tv, ==)) { 808 callout_stop(&sc->sc_ad_tmo); 809 CARP_LOG("%s: MASTER -> BACKUP " 810 "(more frequent advertisement received)\n", 811 SC2IFP(sc)->if_xname); 812 carp_set_state(sc, BACKUP); 813 carp_setrun(sc, 0); 814 carp_setroute(sc, RTM_DELETE); 815 } 816 break; 817 case BACKUP: 818 /* 819 * If we're pre-empting masters who advertise slower than us, 820 * and this one claims to be slower, treat him as down. 821 */ 822 if (carp_opts[CARPCTL_PREEMPT] && 823 timevalcmp(&sc_tv, &ch_tv, <)) { 824 CARP_LOG("%s: BACKUP -> MASTER " 825 "(preempting a slower master)\n", 826 SC2IFP(sc)->if_xname); 827 carp_master_down_locked(sc); 828 break; 829 } 830 831 /* 832 * If the master is going to advertise at such a low frequency 833 * that he's guaranteed to time out, we'd might as well just 834 * treat him as timed out now. 835 */ 836 sc_tv.tv_sec = sc->sc_advbase * 3; 837 if (timevalcmp(&sc_tv, &ch_tv, <)) { 838 CARP_LOG("%s: BACKUP -> MASTER " 839 "(master timed out)\n", 840 SC2IFP(sc)->if_xname); 841 carp_master_down_locked(sc); 842 break; 843 } 844 845 /* 846 * Otherwise, we reset the counter and wait for the next 847 * advertisement. 848 */ 849 carp_setrun(sc, af); 850 break; 851 } 852 853 CARP_UNLOCK(ifp->if_carp); 854 855 m_freem(m); 856 return; 857 } 858 859 static int 860 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 861 { 862 struct m_tag *mtag; 863 struct ifnet *ifp = SC2IFP(sc); 864 865 if (sc->sc_init_counter) { 866 /* this could also be seconds since unix epoch */ 867 sc->sc_counter = arc4random(); 868 sc->sc_counter = sc->sc_counter << 32; 869 sc->sc_counter += arc4random(); 870 } else 871 sc->sc_counter++; 872 873 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 874 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 875 876 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 877 878 /* Tag packet for carp_output */ 879 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 880 if (mtag == NULL) { 881 m_freem(m); 882 SC2IFP(sc)->if_oerrors++; 883 return (ENOMEM); 884 } 885 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 886 m_tag_prepend(m, mtag); 887 888 return (0); 889 } 890 891 static void 892 carp_send_ad_all(void) 893 { 894 struct carp_softc *sc; 895 896 mtx_lock(&carp_mtx); 897 LIST_FOREACH(sc, &carpif_list, sc_next) { 898 if (sc->sc_carpdev == NULL) 899 continue; 900 CARP_SCLOCK(sc); 901 if ((SC2IFP(sc)->if_flags & IFF_UP) && 902 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && 903 sc->sc_state == MASTER) 904 carp_send_ad_locked(sc); 905 CARP_SCUNLOCK(sc); 906 } 907 mtx_unlock(&carp_mtx); 908 } 909 910 static void 911 carp_send_ad(void *v) 912 { 913 struct carp_softc *sc = v; 914 915 CARP_SCLOCK(sc); 916 carp_send_ad_locked(sc); 917 CARP_SCUNLOCK(sc); 918 } 919 920 static void 921 carp_send_ad_locked(struct carp_softc *sc) 922 { 923 struct carp_header ch; 924 struct timeval tv; 925 struct carp_header *ch_ptr; 926 struct mbuf *m; 927 int len, advbase, advskew; 928 929 CARP_SCLOCK_ASSERT(sc); 930 931 /* bow out if we've lost our UPness or RUNNINGuiness */ 932 if (!((SC2IFP(sc)->if_flags & IFF_UP) && 933 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 934 advbase = 255; 935 advskew = 255; 936 } else { 937 advbase = sc->sc_advbase; 938 if (!carp_suppress_preempt || sc->sc_advskew > 240) 939 advskew = sc->sc_advskew; 940 else 941 advskew = 240; 942 tv.tv_sec = advbase; 943 tv.tv_usec = advskew * 1000000 / 256; 944 } 945 946 ch.carp_version = CARP_VERSION; 947 ch.carp_type = CARP_ADVERTISEMENT; 948 ch.carp_vhid = sc->sc_vhid; 949 ch.carp_advbase = advbase; 950 ch.carp_advskew = advskew; 951 ch.carp_authlen = 7; /* XXX DEFINE */ 952 ch.carp_pad1 = 0; /* must be zero */ 953 ch.carp_cksum = 0; 954 955 #ifdef INET 956 if (sc->sc_ia) { 957 struct ip *ip; 958 959 MGETHDR(m, M_DONTWAIT, MT_HEADER); 960 if (m == NULL) { 961 SC2IFP(sc)->if_oerrors++; 962 CARPSTATS_INC(carps_onomem); 963 /* XXX maybe less ? */ 964 if (advbase != 255 || advskew != 255) 965 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 966 carp_send_ad, sc); 967 return; 968 } 969 len = sizeof(*ip) + sizeof(ch); 970 m->m_pkthdr.len = len; 971 m->m_pkthdr.rcvif = NULL; 972 m->m_len = len; 973 MH_ALIGN(m, m->m_len); 974 m->m_flags |= M_MCAST; 975 ip = mtod(m, struct ip *); 976 ip->ip_v = IPVERSION; 977 ip->ip_hl = sizeof(*ip) >> 2; 978 ip->ip_tos = IPTOS_LOWDELAY; 979 ip->ip_len = len; 980 ip->ip_id = ip_newid(); 981 ip->ip_off = IP_DF; 982 ip->ip_ttl = CARP_DFLTTL; 983 ip->ip_p = IPPROTO_CARP; 984 ip->ip_sum = 0; 985 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 986 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 987 988 ch_ptr = (struct carp_header *)(&ip[1]); 989 bcopy(&ch, ch_ptr, sizeof(ch)); 990 if (carp_prepare_ad(m, sc, ch_ptr)) 991 return; 992 993 m->m_data += sizeof(*ip); 994 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 995 m->m_data -= sizeof(*ip); 996 997 getmicrotime(&SC2IFP(sc)->if_lastchange); 998 SC2IFP(sc)->if_opackets++; 999 SC2IFP(sc)->if_obytes += len; 1000 CARPSTATS_INC(carps_opackets); 1001 1002 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 1003 SC2IFP(sc)->if_oerrors++; 1004 if (sc->sc_sendad_errors < INT_MAX) 1005 sc->sc_sendad_errors++; 1006 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1007 carp_suppress_preempt++; 1008 if (carp_suppress_preempt == 1) { 1009 CARP_SCUNLOCK(sc); 1010 carp_send_ad_all(); 1011 CARP_SCLOCK(sc); 1012 } 1013 } 1014 sc->sc_sendad_success = 0; 1015 } else { 1016 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1017 if (++sc->sc_sendad_success >= 1018 CARP_SENDAD_MIN_SUCCESS) { 1019 carp_suppress_preempt--; 1020 sc->sc_sendad_errors = 0; 1021 } 1022 } else 1023 sc->sc_sendad_errors = 0; 1024 } 1025 } 1026 #endif /* INET */ 1027 #ifdef INET6 1028 if (sc->sc_ia6) { 1029 struct ip6_hdr *ip6; 1030 1031 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1032 if (m == NULL) { 1033 SC2IFP(sc)->if_oerrors++; 1034 CARPSTATS_INC(carps_onomem); 1035 /* XXX maybe less ? */ 1036 if (advbase != 255 || advskew != 255) 1037 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1038 carp_send_ad, sc); 1039 return; 1040 } 1041 len = sizeof(*ip6) + sizeof(ch); 1042 m->m_pkthdr.len = len; 1043 m->m_pkthdr.rcvif = NULL; 1044 m->m_len = len; 1045 MH_ALIGN(m, m->m_len); 1046 m->m_flags |= M_MCAST; 1047 ip6 = mtod(m, struct ip6_hdr *); 1048 bzero(ip6, sizeof(*ip6)); 1049 ip6->ip6_vfc |= IPV6_VERSION; 1050 ip6->ip6_hlim = CARP_DFLTTL; 1051 ip6->ip6_nxt = IPPROTO_CARP; 1052 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1053 sizeof(struct in6_addr)); 1054 /* set the multicast destination */ 1055 1056 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1057 ip6->ip6_dst.s6_addr8[15] = 0x12; 1058 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1059 SC2IFP(sc)->if_oerrors++; 1060 m_freem(m); 1061 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1062 return; 1063 } 1064 1065 ch_ptr = (struct carp_header *)(&ip6[1]); 1066 bcopy(&ch, ch_ptr, sizeof(ch)); 1067 if (carp_prepare_ad(m, sc, ch_ptr)) 1068 return; 1069 1070 m->m_data += sizeof(*ip6); 1071 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1072 m->m_data -= sizeof(*ip6); 1073 1074 getmicrotime(&SC2IFP(sc)->if_lastchange); 1075 SC2IFP(sc)->if_opackets++; 1076 SC2IFP(sc)->if_obytes += len; 1077 CARPSTATS_INC(carps_opackets6); 1078 1079 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1080 SC2IFP(sc)->if_oerrors++; 1081 if (sc->sc_sendad_errors < INT_MAX) 1082 sc->sc_sendad_errors++; 1083 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1084 carp_suppress_preempt++; 1085 if (carp_suppress_preempt == 1) { 1086 CARP_SCUNLOCK(sc); 1087 carp_send_ad_all(); 1088 CARP_SCLOCK(sc); 1089 } 1090 } 1091 sc->sc_sendad_success = 0; 1092 } else { 1093 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1094 if (++sc->sc_sendad_success >= 1095 CARP_SENDAD_MIN_SUCCESS) { 1096 carp_suppress_preempt--; 1097 sc->sc_sendad_errors = 0; 1098 } 1099 } else 1100 sc->sc_sendad_errors = 0; 1101 } 1102 } 1103 #endif /* INET6 */ 1104 1105 if (advbase != 255 || advskew != 255) 1106 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1107 carp_send_ad, sc); 1108 1109 } 1110 1111 #ifdef INET 1112 /* 1113 * Broadcast a gratuitous ARP request containing 1114 * the virtual router MAC address for each IP address 1115 * associated with the virtual router. 1116 */ 1117 static void 1118 carp_send_arp(struct carp_softc *sc) 1119 { 1120 struct ifaddr *ifa; 1121 1122 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1123 1124 if (ifa->ifa_addr->sa_family != AF_INET) 1125 continue; 1126 1127 /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ 1128 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); 1129 1130 DELAY(1000); /* XXX */ 1131 } 1132 } 1133 #endif 1134 1135 #ifdef INET6 1136 static void 1137 carp_send_na(struct carp_softc *sc) 1138 { 1139 struct ifaddr *ifa; 1140 struct in6_addr *in6; 1141 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1142 1143 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1144 1145 if (ifa->ifa_addr->sa_family != AF_INET6) 1146 continue; 1147 1148 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1149 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1150 ND_NA_FLAG_OVERRIDE, 1, NULL); 1151 DELAY(1000); /* XXX */ 1152 } 1153 } 1154 #endif /* INET6 */ 1155 1156 #ifdef INET 1157 static int 1158 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1159 { 1160 struct carp_softc *vh; 1161 struct ifaddr *ifa; 1162 int count = 0; 1163 1164 CARP_LOCK_ASSERT(cif); 1165 1166 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1167 if ((type == CARP_COUNT_RUNNING && 1168 (SC2IFP(vh)->if_flags & IFF_UP) && 1169 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || 1170 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1171 IF_ADDR_LOCK(SC2IFP(vh)); 1172 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1173 ifa_list) { 1174 if (ifa->ifa_addr->sa_family == AF_INET && 1175 ia->ia_addr.sin_addr.s_addr == 1176 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1177 count++; 1178 } 1179 IF_ADDR_UNLOCK(SC2IFP(vh)); 1180 } 1181 } 1182 return (count); 1183 } 1184 1185 int 1186 carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, 1187 struct in_addr *isaddr, u_int8_t **enaddr) 1188 { 1189 struct carp_if *cif; 1190 struct carp_softc *vh; 1191 int index, count = 0; 1192 struct ifaddr *ifa; 1193 1194 cif = ifp->if_carp; 1195 CARP_LOCK(cif); 1196 1197 if (carp_opts[CARPCTL_ARPBALANCE]) { 1198 /* 1199 * XXX proof of concept implementation. 1200 * We use the source ip to decide which virtual host should 1201 * handle the request. If we're master of that virtual host, 1202 * then we respond, otherwise, just drop the arp packet on 1203 * the floor. 1204 */ 1205 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1206 if (count == 0) { 1207 /* should never reach this */ 1208 CARP_UNLOCK(cif); 1209 return (0); 1210 } 1211 1212 /* this should be a hash, like pf_hash() */ 1213 index = ntohl(isaddr->s_addr) % count; 1214 count = 0; 1215 1216 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1217 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1218 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { 1219 IF_ADDR_LOCK(SC2IFP(vh)); 1220 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1221 ifa_list) { 1222 if (ifa->ifa_addr->sa_family == 1223 AF_INET && 1224 ia->ia_addr.sin_addr.s_addr == 1225 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1226 if (count == index) { 1227 if (vh->sc_state == 1228 MASTER) { 1229 *enaddr = IF_LLADDR(vh->sc_ifp); 1230 IF_ADDR_UNLOCK(SC2IFP(vh)); 1231 CARP_UNLOCK(cif); 1232 return (1); 1233 } else { 1234 IF_ADDR_UNLOCK(SC2IFP(vh)); 1235 CARP_UNLOCK(cif); 1236 return (0); 1237 } 1238 } 1239 count++; 1240 } 1241 } 1242 IF_ADDR_UNLOCK(SC2IFP(vh)); 1243 } 1244 } 1245 } else { 1246 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1247 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1248 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1249 ia->ia_ifp == SC2IFP(vh) && 1250 vh->sc_state == MASTER) { 1251 *enaddr = IF_LLADDR(vh->sc_ifp); 1252 CARP_UNLOCK(cif); 1253 return (1); 1254 } 1255 } 1256 } 1257 CARP_UNLOCK(cif); 1258 return (0); 1259 } 1260 #endif 1261 1262 #ifdef INET6 1263 struct ifaddr * 1264 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1265 { 1266 struct carp_if *cif; 1267 struct carp_softc *vh; 1268 struct ifaddr *ifa; 1269 1270 cif = ifp->if_carp; 1271 CARP_LOCK(cif); 1272 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1273 IF_ADDR_LOCK(SC2IFP(vh)); 1274 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { 1275 if (IN6_ARE_ADDR_EQUAL(taddr, 1276 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1277 (SC2IFP(vh)->if_flags & IFF_UP) && 1278 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1279 vh->sc_state == MASTER) { 1280 ifa_ref(ifa); 1281 IF_ADDR_UNLOCK(SC2IFP(vh)); 1282 CARP_UNLOCK(cif); 1283 return (ifa); 1284 } 1285 } 1286 IF_ADDR_UNLOCK(SC2IFP(vh)); 1287 } 1288 CARP_UNLOCK(cif); 1289 1290 return (NULL); 1291 } 1292 1293 caddr_t 1294 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1295 { 1296 struct m_tag *mtag; 1297 struct carp_if *cif; 1298 struct carp_softc *sc; 1299 struct ifaddr *ifa; 1300 1301 cif = ifp->if_carp; 1302 CARP_LOCK(cif); 1303 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1304 IF_ADDR_LOCK(SC2IFP(sc)); 1305 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1306 if (IN6_ARE_ADDR_EQUAL(taddr, 1307 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1308 (SC2IFP(sc)->if_flags & IFF_UP) && 1309 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { 1310 struct ifnet *ifp = SC2IFP(sc); 1311 mtag = m_tag_get(PACKET_TAG_CARP, 1312 sizeof(struct ifnet *), M_NOWAIT); 1313 if (mtag == NULL) { 1314 /* better a bit than nothing */ 1315 IF_ADDR_UNLOCK(SC2IFP(sc)); 1316 CARP_UNLOCK(cif); 1317 return (IF_LLADDR(sc->sc_ifp)); 1318 } 1319 bcopy(&ifp, (caddr_t)(mtag + 1), 1320 sizeof(struct ifnet *)); 1321 m_tag_prepend(m, mtag); 1322 1323 IF_ADDR_UNLOCK(SC2IFP(sc)); 1324 CARP_UNLOCK(cif); 1325 return (IF_LLADDR(sc->sc_ifp)); 1326 } 1327 } 1328 IF_ADDR_UNLOCK(SC2IFP(sc)); 1329 } 1330 CARP_UNLOCK(cif); 1331 1332 return (NULL); 1333 } 1334 #endif 1335 1336 struct ifnet * 1337 carp_forus(struct ifnet *ifp, u_char *dhost) 1338 { 1339 struct carp_if *cif; 1340 struct carp_softc *vh; 1341 u_int8_t *ena = dhost; 1342 1343 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1344 return (NULL); 1345 1346 cif = ifp->if_carp; 1347 CARP_LOCK(cif); 1348 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1349 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1350 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1351 vh->sc_state == MASTER && 1352 !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { 1353 CARP_UNLOCK(cif); 1354 return (SC2IFP(vh)); 1355 } 1356 1357 CARP_UNLOCK(cif); 1358 return (NULL); 1359 } 1360 1361 static void 1362 carp_master_down(void *v) 1363 { 1364 struct carp_softc *sc = v; 1365 1366 CARP_SCLOCK(sc); 1367 carp_master_down_locked(sc); 1368 CARP_SCUNLOCK(sc); 1369 } 1370 1371 static void 1372 carp_master_down_locked(struct carp_softc *sc) 1373 { 1374 if (sc->sc_carpdev) 1375 CARP_SCLOCK_ASSERT(sc); 1376 1377 switch (sc->sc_state) { 1378 case INIT: 1379 printf("%s: master_down event in INIT state\n", 1380 SC2IFP(sc)->if_xname); 1381 break; 1382 case MASTER: 1383 break; 1384 case BACKUP: 1385 carp_set_state(sc, MASTER); 1386 carp_send_ad_locked(sc); 1387 #ifdef INET 1388 carp_send_arp(sc); 1389 #endif 1390 #ifdef INET6 1391 carp_send_na(sc); 1392 #endif /* INET6 */ 1393 carp_setrun(sc, 0); 1394 carp_setroute(sc, RTM_ADD); 1395 break; 1396 } 1397 } 1398 1399 /* 1400 * When in backup state, af indicates whether to reset the master down timer 1401 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1402 */ 1403 static void 1404 carp_setrun(struct carp_softc *sc, sa_family_t af) 1405 { 1406 struct timeval tv; 1407 1408 if (sc->sc_carpdev == NULL) { 1409 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1410 carp_set_state(sc, INIT); 1411 return; 1412 } else 1413 CARP_SCLOCK_ASSERT(sc); 1414 1415 if (SC2IFP(sc)->if_flags & IFF_UP && 1416 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) && 1417 sc->sc_carpdev->if_link_state == LINK_STATE_UP) 1418 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 1419 else { 1420 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1421 carp_setroute(sc, RTM_DELETE); 1422 return; 1423 } 1424 1425 switch (sc->sc_state) { 1426 case INIT: 1427 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1428 carp_send_ad_locked(sc); 1429 #ifdef INET 1430 carp_send_arp(sc); 1431 #endif 1432 #ifdef INET6 1433 carp_send_na(sc); 1434 #endif /* INET6 */ 1435 CARP_LOG("%s: INIT -> MASTER (preempting)\n", 1436 SC2IFP(sc)->if_xname); 1437 carp_set_state(sc, MASTER); 1438 carp_setroute(sc, RTM_ADD); 1439 } else { 1440 CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); 1441 carp_set_state(sc, BACKUP); 1442 carp_setroute(sc, RTM_DELETE); 1443 carp_setrun(sc, 0); 1444 } 1445 break; 1446 case BACKUP: 1447 callout_stop(&sc->sc_ad_tmo); 1448 tv.tv_sec = 3 * sc->sc_advbase; 1449 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1450 switch (af) { 1451 #ifdef INET 1452 case AF_INET: 1453 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1454 carp_master_down, sc); 1455 break; 1456 #endif /* INET */ 1457 #ifdef INET6 1458 case AF_INET6: 1459 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1460 carp_master_down, sc); 1461 break; 1462 #endif /* INET6 */ 1463 default: 1464 if (sc->sc_naddrs) 1465 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1466 carp_master_down, sc); 1467 if (sc->sc_naddrs6) 1468 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1469 carp_master_down, sc); 1470 break; 1471 } 1472 break; 1473 case MASTER: 1474 tv.tv_sec = sc->sc_advbase; 1475 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1476 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1477 carp_send_ad, sc); 1478 break; 1479 } 1480 } 1481 1482 #ifdef INET 1483 static void 1484 carp_multicast_cleanup(struct carp_softc *sc, int dofree) 1485 { 1486 struct ip_moptions *imo = &sc->sc_imo; 1487 u_int16_t n = imo->imo_num_memberships; 1488 1489 /* Clean up our own multicast memberships */ 1490 while (n-- > 0) { 1491 if (imo->imo_membership[n] != NULL) { 1492 if (dofree) 1493 in_delmulti(imo->imo_membership[n]); 1494 imo->imo_membership[n] = NULL; 1495 } 1496 } 1497 KASSERT(imo->imo_mfilters == NULL, 1498 ("%s: imo_mfilters != NULL", __func__)); 1499 imo->imo_num_memberships = 0; 1500 imo->imo_multicast_ifp = NULL; 1501 } 1502 #endif 1503 1504 #ifdef INET6 1505 static void 1506 carp_multicast6_cleanup(struct carp_softc *sc, int dofree) 1507 { 1508 struct ip6_moptions *im6o = &sc->sc_im6o; 1509 u_int16_t n = im6o->im6o_num_memberships; 1510 1511 while (n-- > 0) { 1512 if (im6o->im6o_membership[n] != NULL) { 1513 if (dofree) 1514 in6_mc_leave(im6o->im6o_membership[n], NULL); 1515 im6o->im6o_membership[n] = NULL; 1516 } 1517 } 1518 KASSERT(im6o->im6o_mfilters == NULL, 1519 ("%s: im6o_mfilters != NULL", __func__)); 1520 im6o->im6o_num_memberships = 0; 1521 im6o->im6o_multicast_ifp = NULL; 1522 } 1523 #endif 1524 1525 #ifdef INET 1526 static int 1527 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1528 { 1529 struct ifnet *ifp; 1530 struct carp_if *cif; 1531 struct in_ifaddr *ia, *ia_if; 1532 struct ip_moptions *imo = &sc->sc_imo; 1533 struct in_addr addr; 1534 u_long iaddr = htonl(sin->sin_addr.s_addr); 1535 int own, error; 1536 1537 if (sin->sin_addr.s_addr == 0) { 1538 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1539 carp_set_state(sc, INIT); 1540 if (sc->sc_naddrs) 1541 SC2IFP(sc)->if_flags |= IFF_UP; 1542 if (sc->sc_carpdev) 1543 CARP_SCLOCK(sc); 1544 carp_setrun(sc, 0); 1545 if (sc->sc_carpdev) 1546 CARP_SCUNLOCK(sc); 1547 return (0); 1548 } 1549 1550 /* we have to do it by hands to check we won't match on us */ 1551 ia_if = NULL; own = 0; 1552 IN_IFADDR_RLOCK(); 1553 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1554 /* and, yeah, we need a multicast-capable iface too */ 1555 if (ia->ia_ifp != SC2IFP(sc) && 1556 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1557 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1558 if (!ia_if) 1559 ia_if = ia; 1560 if (sin->sin_addr.s_addr == 1561 ia->ia_addr.sin_addr.s_addr) 1562 own++; 1563 } 1564 } 1565 1566 if (!ia_if) { 1567 IN_IFADDR_RUNLOCK(); 1568 return (EADDRNOTAVAIL); 1569 } 1570 1571 ia = ia_if; 1572 ifa_ref(&ia->ia_ifa); 1573 IN_IFADDR_RUNLOCK(); 1574 1575 ifp = ia->ia_ifp; 1576 1577 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1578 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) { 1579 ifa_free(&ia->ia_ifa); 1580 return (EADDRNOTAVAIL); 1581 } 1582 1583 if (imo->imo_num_memberships == 0) { 1584 addr.s_addr = htonl(INADDR_CARP_GROUP); 1585 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == 1586 NULL) { 1587 ifa_free(&ia->ia_ifa); 1588 return (ENOBUFS); 1589 } 1590 imo->imo_num_memberships++; 1591 imo->imo_multicast_ifp = ifp; 1592 imo->imo_multicast_ttl = CARP_DFLTTL; 1593 imo->imo_multicast_loop = 0; 1594 } 1595 1596 if (!ifp->if_carp) { 1597 1598 cif = malloc(sizeof(*cif), M_CARP, 1599 M_WAITOK|M_ZERO); 1600 if (!cif) { 1601 error = ENOBUFS; 1602 goto cleanup; 1603 } 1604 if ((error = ifpromisc(ifp, 1))) { 1605 free(cif, M_CARP); 1606 goto cleanup; 1607 } 1608 1609 CARP_LOCK_INIT(cif); 1610 CARP_LOCK(cif); 1611 cif->vhif_ifp = ifp; 1612 TAILQ_INIT(&cif->vhif_vrs); 1613 ifp->if_carp = cif; 1614 1615 } else { 1616 struct carp_softc *vr; 1617 1618 cif = (struct carp_if *)ifp->if_carp; 1619 CARP_LOCK(cif); 1620 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1621 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1622 CARP_UNLOCK(cif); 1623 error = EEXIST; 1624 goto cleanup; 1625 } 1626 } 1627 sc->sc_ia = ia; 1628 sc->sc_carpdev = ifp; 1629 1630 { /* XXX prevent endless loop if already in queue */ 1631 struct carp_softc *vr, *after = NULL; 1632 int myself = 0; 1633 cif = (struct carp_if *)ifp->if_carp; 1634 1635 /* XXX: cif should not change, right? So we still hold the lock */ 1636 CARP_LOCK_ASSERT(cif); 1637 1638 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1639 if (vr == sc) 1640 myself = 1; 1641 if (vr->sc_vhid < sc->sc_vhid) 1642 after = vr; 1643 } 1644 1645 if (!myself) { 1646 /* We're trying to keep things in order */ 1647 if (after == NULL) { 1648 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1649 } else { 1650 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1651 } 1652 cif->vhif_nvrs++; 1653 } 1654 } 1655 1656 sc->sc_naddrs++; 1657 SC2IFP(sc)->if_flags |= IFF_UP; 1658 if (own) 1659 sc->sc_advskew = 0; 1660 carp_sc_state_locked(sc); 1661 carp_setrun(sc, 0); 1662 1663 CARP_UNLOCK(cif); 1664 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1665 1666 return (0); 1667 1668 cleanup: 1669 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1670 ifa_free(&ia->ia_ifa); 1671 return (error); 1672 } 1673 1674 static int 1675 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1676 { 1677 int error = 0; 1678 1679 if (!--sc->sc_naddrs) { 1680 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1681 struct ip_moptions *imo = &sc->sc_imo; 1682 1683 CARP_LOCK(cif); 1684 callout_stop(&sc->sc_ad_tmo); 1685 SC2IFP(sc)->if_flags &= ~IFF_UP; 1686 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1687 sc->sc_vhid = -1; 1688 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1689 imo->imo_multicast_ifp = NULL; 1690 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1691 if (!--cif->vhif_nvrs) { 1692 sc->sc_carpdev->if_carp = NULL; 1693 CARP_LOCK_DESTROY(cif); 1694 free(cif, M_CARP); 1695 } else { 1696 CARP_UNLOCK(cif); 1697 } 1698 } 1699 1700 return (error); 1701 } 1702 #endif 1703 1704 #ifdef INET6 1705 static int 1706 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1707 { 1708 struct ifnet *ifp; 1709 struct carp_if *cif; 1710 struct in6_ifaddr *ia, *ia_if; 1711 struct ip6_moptions *im6o = &sc->sc_im6o; 1712 struct in6_addr in6; 1713 int own, error; 1714 1715 error = 0; 1716 1717 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1718 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1719 carp_set_state(sc, INIT); 1720 if (sc->sc_naddrs6) 1721 SC2IFP(sc)->if_flags |= IFF_UP; 1722 if (sc->sc_carpdev) 1723 CARP_SCLOCK(sc); 1724 carp_setrun(sc, 0); 1725 if (sc->sc_carpdev) 1726 CARP_SCUNLOCK(sc); 1727 return (0); 1728 } 1729 1730 /* we have to do it by hands to check we won't match on us */ 1731 ia_if = NULL; own = 0; 1732 IN6_IFADDR_RLOCK(); 1733 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 1734 int i; 1735 1736 for (i = 0; i < 4; i++) { 1737 if ((sin6->sin6_addr.s6_addr32[i] & 1738 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1739 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1740 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1741 break; 1742 } 1743 /* and, yeah, we need a multicast-capable iface too */ 1744 if (ia->ia_ifp != SC2IFP(sc) && 1745 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1746 (i == 4)) { 1747 if (!ia_if) 1748 ia_if = ia; 1749 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1750 &ia->ia_addr.sin6_addr)) 1751 own++; 1752 } 1753 } 1754 1755 if (!ia_if) { 1756 IN6_IFADDR_RUNLOCK(); 1757 return (EADDRNOTAVAIL); 1758 } 1759 ia = ia_if; 1760 ifa_ref(&ia->ia_ifa); 1761 IN6_IFADDR_RUNLOCK(); 1762 ifp = ia->ia_ifp; 1763 1764 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1765 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) { 1766 ifa_free(&ia->ia_ifa); 1767 return (EADDRNOTAVAIL); 1768 } 1769 1770 if (!sc->sc_naddrs6) { 1771 struct in6_multi *in6m; 1772 1773 im6o->im6o_multicast_ifp = ifp; 1774 1775 /* join CARP multicast address */ 1776 bzero(&in6, sizeof(in6)); 1777 in6.s6_addr16[0] = htons(0xff02); 1778 in6.s6_addr8[15] = 0x12; 1779 if (in6_setscope(&in6, ifp, NULL) != 0) 1780 goto cleanup; 1781 in6m = NULL; 1782 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1783 if (error) 1784 goto cleanup; 1785 im6o->im6o_membership[0] = in6m; 1786 im6o->im6o_num_memberships++; 1787 1788 /* join solicited multicast address */ 1789 bzero(&in6, sizeof(in6)); 1790 in6.s6_addr16[0] = htons(0xff02); 1791 in6.s6_addr32[1] = 0; 1792 in6.s6_addr32[2] = htonl(1); 1793 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1794 in6.s6_addr8[12] = 0xff; 1795 if (in6_setscope(&in6, ifp, NULL) != 0) 1796 goto cleanup; 1797 in6m = NULL; 1798 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1799 if (error) 1800 goto cleanup; 1801 im6o->im6o_membership[1] = in6m; 1802 im6o->im6o_num_memberships++; 1803 } 1804 1805 if (!ifp->if_carp) { 1806 cif = malloc(sizeof(*cif), M_CARP, 1807 M_WAITOK|M_ZERO); 1808 if (!cif) { 1809 error = ENOBUFS; 1810 goto cleanup; 1811 } 1812 if ((error = ifpromisc(ifp, 1))) { 1813 free(cif, M_CARP); 1814 goto cleanup; 1815 } 1816 1817 CARP_LOCK_INIT(cif); 1818 CARP_LOCK(cif); 1819 cif->vhif_ifp = ifp; 1820 TAILQ_INIT(&cif->vhif_vrs); 1821 ifp->if_carp = cif; 1822 1823 } else { 1824 struct carp_softc *vr; 1825 1826 cif = (struct carp_if *)ifp->if_carp; 1827 CARP_LOCK(cif); 1828 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1829 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1830 CARP_UNLOCK(cif); 1831 error = EINVAL; 1832 goto cleanup; 1833 } 1834 } 1835 sc->sc_ia6 = ia; 1836 sc->sc_carpdev = ifp; 1837 1838 { /* XXX prevent endless loop if already in queue */ 1839 struct carp_softc *vr, *after = NULL; 1840 int myself = 0; 1841 cif = (struct carp_if *)ifp->if_carp; 1842 CARP_LOCK_ASSERT(cif); 1843 1844 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1845 if (vr == sc) 1846 myself = 1; 1847 if (vr->sc_vhid < sc->sc_vhid) 1848 after = vr; 1849 } 1850 1851 if (!myself) { 1852 /* We're trying to keep things in order */ 1853 if (after == NULL) { 1854 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1855 } else { 1856 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1857 } 1858 cif->vhif_nvrs++; 1859 } 1860 } 1861 1862 sc->sc_naddrs6++; 1863 SC2IFP(sc)->if_flags |= IFF_UP; 1864 if (own) 1865 sc->sc_advskew = 0; 1866 carp_sc_state_locked(sc); 1867 carp_setrun(sc, 0); 1868 1869 CARP_UNLOCK(cif); 1870 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1871 1872 return (0); 1873 1874 cleanup: 1875 if (!sc->sc_naddrs6) 1876 carp_multicast6_cleanup(sc, 1); 1877 ifa_free(&ia->ia_ifa); 1878 return (error); 1879 } 1880 1881 static int 1882 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1883 { 1884 int error = 0; 1885 1886 if (!--sc->sc_naddrs6) { 1887 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1888 1889 CARP_LOCK(cif); 1890 callout_stop(&sc->sc_ad_tmo); 1891 SC2IFP(sc)->if_flags &= ~IFF_UP; 1892 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1893 sc->sc_vhid = -1; 1894 carp_multicast6_cleanup(sc, 1); 1895 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1896 if (!--cif->vhif_nvrs) { 1897 CARP_LOCK_DESTROY(cif); 1898 sc->sc_carpdev->if_carp = NULL; 1899 free(cif, M_CARP); 1900 } else 1901 CARP_UNLOCK(cif); 1902 } 1903 1904 return (error); 1905 } 1906 #endif /* INET6 */ 1907 1908 static int 1909 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1910 { 1911 struct carp_softc *sc = ifp->if_softc, *vr; 1912 struct carpreq carpr; 1913 struct ifaddr *ifa; 1914 struct ifreq *ifr; 1915 struct ifaliasreq *ifra; 1916 int locked = 0, error = 0; 1917 1918 ifa = (struct ifaddr *)addr; 1919 ifra = (struct ifaliasreq *)addr; 1920 ifr = (struct ifreq *)addr; 1921 1922 switch (cmd) { 1923 case SIOCSIFADDR: 1924 switch (ifa->ifa_addr->sa_family) { 1925 #ifdef INET 1926 case AF_INET: 1927 SC2IFP(sc)->if_flags |= IFF_UP; 1928 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1929 sizeof(struct sockaddr)); 1930 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1931 break; 1932 #endif /* INET */ 1933 #ifdef INET6 1934 case AF_INET6: 1935 SC2IFP(sc)->if_flags |= IFF_UP; 1936 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1937 break; 1938 #endif /* INET6 */ 1939 default: 1940 error = EAFNOSUPPORT; 1941 break; 1942 } 1943 break; 1944 1945 case SIOCAIFADDR: 1946 switch (ifa->ifa_addr->sa_family) { 1947 #ifdef INET 1948 case AF_INET: 1949 SC2IFP(sc)->if_flags |= IFF_UP; 1950 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1951 sizeof(struct sockaddr)); 1952 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1953 break; 1954 #endif /* INET */ 1955 #ifdef INET6 1956 case AF_INET6: 1957 SC2IFP(sc)->if_flags |= IFF_UP; 1958 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1959 break; 1960 #endif /* INET6 */ 1961 default: 1962 error = EAFNOSUPPORT; 1963 break; 1964 } 1965 break; 1966 1967 case SIOCDIFADDR: 1968 switch (ifa->ifa_addr->sa_family) { 1969 #ifdef INET 1970 case AF_INET: 1971 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1972 break; 1973 #endif /* INET */ 1974 #ifdef INET6 1975 case AF_INET6: 1976 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1977 break; 1978 #endif /* INET6 */ 1979 default: 1980 error = EAFNOSUPPORT; 1981 break; 1982 } 1983 break; 1984 1985 case SIOCSIFFLAGS: 1986 if (sc->sc_carpdev) { 1987 locked = 1; 1988 CARP_SCLOCK(sc); 1989 } 1990 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1991 callout_stop(&sc->sc_ad_tmo); 1992 callout_stop(&sc->sc_md_tmo); 1993 callout_stop(&sc->sc_md6_tmo); 1994 if (sc->sc_state == MASTER) 1995 carp_send_ad_locked(sc); 1996 carp_set_state(sc, INIT); 1997 carp_setrun(sc, 0); 1998 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1999 SC2IFP(sc)->if_flags |= IFF_UP; 2000 carp_setrun(sc, 0); 2001 } 2002 break; 2003 2004 case SIOCSVH: 2005 error = priv_check(curthread, PRIV_NETINET_CARP); 2006 if (error) 2007 break; 2008 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2009 break; 2010 error = 1; 2011 if (sc->sc_carpdev) { 2012 locked = 1; 2013 CARP_SCLOCK(sc); 2014 } 2015 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2016 switch (carpr.carpr_state) { 2017 case BACKUP: 2018 callout_stop(&sc->sc_ad_tmo); 2019 carp_set_state(sc, BACKUP); 2020 carp_setrun(sc, 0); 2021 carp_setroute(sc, RTM_DELETE); 2022 break; 2023 case MASTER: 2024 carp_master_down_locked(sc); 2025 break; 2026 default: 2027 break; 2028 } 2029 } 2030 if (carpr.carpr_vhid > 0) { 2031 if (carpr.carpr_vhid > 255) { 2032 error = EINVAL; 2033 break; 2034 } 2035 if (sc->sc_carpdev) { 2036 struct carp_if *cif; 2037 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2038 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2039 if (vr != sc && 2040 vr->sc_vhid == carpr.carpr_vhid) { 2041 error = EEXIST; 2042 break; 2043 } 2044 if (error == EEXIST) 2045 break; 2046 } 2047 sc->sc_vhid = carpr.carpr_vhid; 2048 IF_LLADDR(sc->sc_ifp)[0] = 0; 2049 IF_LLADDR(sc->sc_ifp)[1] = 0; 2050 IF_LLADDR(sc->sc_ifp)[2] = 0x5e; 2051 IF_LLADDR(sc->sc_ifp)[3] = 0; 2052 IF_LLADDR(sc->sc_ifp)[4] = 1; 2053 IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; 2054 error--; 2055 } 2056 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2057 if (carpr.carpr_advskew >= 255) { 2058 error = EINVAL; 2059 break; 2060 } 2061 if (carpr.carpr_advbase > 255) { 2062 error = EINVAL; 2063 break; 2064 } 2065 sc->sc_advbase = carpr.carpr_advbase; 2066 sc->sc_advskew = carpr.carpr_advskew; 2067 error--; 2068 } 2069 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2070 if (error > 0) 2071 error = EINVAL; 2072 else { 2073 error = 0; 2074 carp_setrun(sc, 0); 2075 } 2076 break; 2077 2078 case SIOCGVH: 2079 /* XXX: lockless read */ 2080 bzero(&carpr, sizeof(carpr)); 2081 carpr.carpr_state = sc->sc_state; 2082 carpr.carpr_vhid = sc->sc_vhid; 2083 carpr.carpr_advbase = sc->sc_advbase; 2084 carpr.carpr_advskew = sc->sc_advskew; 2085 error = priv_check(curthread, PRIV_NETINET_CARP); 2086 if (error == 0) 2087 bcopy(sc->sc_key, carpr.carpr_key, 2088 sizeof(carpr.carpr_key)); 2089 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2090 break; 2091 2092 default: 2093 error = EINVAL; 2094 } 2095 2096 if (locked) 2097 CARP_SCUNLOCK(sc); 2098 2099 carp_hmac_prepare(sc); 2100 2101 return (error); 2102 } 2103 2104 /* 2105 * XXX: this is looutput. We should eventually use it from there. 2106 */ 2107 static int 2108 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2109 struct route *ro) 2110 { 2111 u_int32_t af; 2112 struct rtentry *rt = NULL; 2113 2114 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 2115 2116 if (ro != NULL) 2117 rt = ro->ro_rt; 2118 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2119 m_freem(m); 2120 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 2121 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 2122 } 2123 2124 ifp->if_opackets++; 2125 ifp->if_obytes += m->m_pkthdr.len; 2126 2127 /* BPF writes need to be handled specially. */ 2128 if (dst->sa_family == AF_UNSPEC) { 2129 bcopy(dst->sa_data, &af, sizeof(af)); 2130 dst->sa_family = af; 2131 } 2132 2133 #if 1 /* XXX */ 2134 switch (dst->sa_family) { 2135 case AF_INET: 2136 case AF_INET6: 2137 case AF_IPX: 2138 case AF_APPLETALK: 2139 break; 2140 default: 2141 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 2142 m_freem(m); 2143 return (EAFNOSUPPORT); 2144 } 2145 #endif 2146 return(if_simloop(ifp, m, dst->sa_family, 0)); 2147 } 2148 2149 /* 2150 * Start output on carp interface. This function should never be called. 2151 */ 2152 static void 2153 carp_start(struct ifnet *ifp) 2154 { 2155 #ifdef DEBUG 2156 printf("%s: start called\n", ifp->if_xname); 2157 #endif 2158 } 2159 2160 int 2161 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2162 struct rtentry *rt) 2163 { 2164 struct m_tag *mtag; 2165 struct carp_softc *sc; 2166 struct ifnet *carp_ifp; 2167 2168 if (!sa) 2169 return (0); 2170 2171 switch (sa->sa_family) { 2172 #ifdef INET 2173 case AF_INET: 2174 break; 2175 #endif /* INET */ 2176 #ifdef INET6 2177 case AF_INET6: 2178 break; 2179 #endif /* INET6 */ 2180 default: 2181 return (0); 2182 } 2183 2184 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2185 if (mtag == NULL) 2186 return (0); 2187 2188 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2189 sc = carp_ifp->if_softc; 2190 2191 /* Set the source MAC address to Virtual Router MAC Address */ 2192 switch (ifp->if_type) { 2193 case IFT_ETHER: 2194 case IFT_L2VLAN: { 2195 struct ether_header *eh; 2196 2197 eh = mtod(m, struct ether_header *); 2198 eh->ether_shost[0] = 0; 2199 eh->ether_shost[1] = 0; 2200 eh->ether_shost[2] = 0x5e; 2201 eh->ether_shost[3] = 0; 2202 eh->ether_shost[4] = 1; 2203 eh->ether_shost[5] = sc->sc_vhid; 2204 } 2205 break; 2206 case IFT_FDDI: { 2207 struct fddi_header *fh; 2208 2209 fh = mtod(m, struct fddi_header *); 2210 fh->fddi_shost[0] = 0; 2211 fh->fddi_shost[1] = 0; 2212 fh->fddi_shost[2] = 0x5e; 2213 fh->fddi_shost[3] = 0; 2214 fh->fddi_shost[4] = 1; 2215 fh->fddi_shost[5] = sc->sc_vhid; 2216 } 2217 break; 2218 case IFT_ISO88025: { 2219 struct iso88025_header *th; 2220 th = mtod(m, struct iso88025_header *); 2221 th->iso88025_shost[0] = 3; 2222 th->iso88025_shost[1] = 0; 2223 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2224 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2225 th->iso88025_shost[4] = 0; 2226 th->iso88025_shost[5] = 0; 2227 } 2228 break; 2229 default: 2230 printf("%s: carp is not supported for this interface type\n", 2231 ifp->if_xname); 2232 return (EOPNOTSUPP); 2233 } 2234 2235 return (0); 2236 } 2237 2238 static void 2239 carp_set_state(struct carp_softc *sc, int state) 2240 { 2241 int link_state; 2242 2243 if (sc->sc_carpdev) 2244 CARP_SCLOCK_ASSERT(sc); 2245 2246 if (sc->sc_state == state) 2247 return; 2248 2249 sc->sc_state = state; 2250 switch (state) { 2251 case BACKUP: 2252 link_state = LINK_STATE_DOWN; 2253 break; 2254 case MASTER: 2255 link_state = LINK_STATE_UP; 2256 break; 2257 default: 2258 link_state = LINK_STATE_UNKNOWN; 2259 break; 2260 } 2261 if_link_state_change(SC2IFP(sc), link_state); 2262 } 2263 2264 void 2265 carp_carpdev_state(struct ifnet *ifp) 2266 { 2267 struct carp_if *cif; 2268 2269 cif = ifp->if_carp; 2270 CARP_LOCK(cif); 2271 carp_carpdev_state_locked(cif); 2272 CARP_UNLOCK(cif); 2273 } 2274 2275 static void 2276 carp_carpdev_state_locked(struct carp_if *cif) 2277 { 2278 struct carp_softc *sc; 2279 2280 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2281 carp_sc_state_locked(sc); 2282 } 2283 2284 static void 2285 carp_sc_state_locked(struct carp_softc *sc) 2286 { 2287 CARP_SCLOCK_ASSERT(sc); 2288 2289 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2290 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2291 sc->sc_flags_backup = SC2IFP(sc)->if_flags; 2292 SC2IFP(sc)->if_flags &= ~IFF_UP; 2293 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 2294 callout_stop(&sc->sc_ad_tmo); 2295 callout_stop(&sc->sc_md_tmo); 2296 callout_stop(&sc->sc_md6_tmo); 2297 carp_set_state(sc, INIT); 2298 carp_setrun(sc, 0); 2299 if (!sc->sc_suppress) { 2300 carp_suppress_preempt++; 2301 if (carp_suppress_preempt == 1) { 2302 CARP_SCUNLOCK(sc); 2303 carp_send_ad_all(); 2304 CARP_SCLOCK(sc); 2305 } 2306 } 2307 sc->sc_suppress = 1; 2308 } else { 2309 SC2IFP(sc)->if_flags |= sc->sc_flags_backup; 2310 carp_set_state(sc, INIT); 2311 carp_setrun(sc, 0); 2312 if (sc->sc_suppress) 2313 carp_suppress_preempt--; 2314 sc->sc_suppress = 0; 2315 } 2316 2317 return; 2318 } 2319 2320 #ifdef INET 2321 extern struct domain inetdomain; 2322 static struct protosw in_carp_protosw = { 2323 .pr_type = SOCK_RAW, 2324 .pr_domain = &inetdomain, 2325 .pr_protocol = IPPROTO_CARP, 2326 .pr_flags = PR_ATOMIC|PR_ADDR, 2327 .pr_input = carp_input, 2328 .pr_output = (pr_output_t *)rip_output, 2329 .pr_ctloutput = rip_ctloutput, 2330 .pr_usrreqs = &rip_usrreqs 2331 }; 2332 #endif 2333 2334 #ifdef INET6 2335 extern struct domain inet6domain; 2336 static struct ip6protosw in6_carp_protosw = { 2337 .pr_type = SOCK_RAW, 2338 .pr_domain = &inet6domain, 2339 .pr_protocol = IPPROTO_CARP, 2340 .pr_flags = PR_ATOMIC|PR_ADDR, 2341 .pr_input = carp6_input, 2342 .pr_output = rip6_output, 2343 .pr_ctloutput = rip6_ctloutput, 2344 .pr_usrreqs = &rip6_usrreqs 2345 }; 2346 #endif 2347 2348 static void 2349 carp_mod_cleanup(void) 2350 { 2351 2352 if (if_detach_event_tag == NULL) 2353 return; 2354 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2355 if_clone_detach(&carp_cloner); 2356 #ifdef INET 2357 if (proto_reg[CARP_INET] == 0) { 2358 (void)ipproto_unregister(IPPROTO_CARP); 2359 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2360 proto_reg[CARP_INET] = -1; 2361 } 2362 carp_iamatch_p = NULL; 2363 #endif 2364 #ifdef INET6 2365 if (proto_reg[CARP_INET6] == 0) { 2366 (void)ip6proto_unregister(IPPROTO_CARP); 2367 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2368 proto_reg[CARP_INET6] = -1; 2369 } 2370 carp_iamatch6_p = NULL; 2371 carp_macmatch6_p = NULL; 2372 #endif 2373 carp_linkstate_p = NULL; 2374 carp_forus_p = NULL; 2375 carp_output_p = NULL; 2376 mtx_destroy(&carp_mtx); 2377 } 2378 2379 static int 2380 carp_mod_load(void) 2381 { 2382 int err; 2383 2384 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2385 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 2386 if (if_detach_event_tag == NULL) 2387 return (ENOMEM); 2388 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2389 LIST_INIT(&carpif_list); 2390 if_clone_attach(&carp_cloner); 2391 carp_linkstate_p = carp_carpdev_state; 2392 carp_forus_p = carp_forus; 2393 carp_output_p = carp_output; 2394 #ifdef INET6 2395 carp_iamatch6_p = carp_iamatch6; 2396 carp_macmatch6_p = carp_macmatch6; 2397 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2398 (struct protosw *)&in6_carp_protosw); 2399 if (proto_reg[CARP_INET6] != 0) { 2400 printf("carp: error %d attaching to PF_INET6\n", 2401 proto_reg[CARP_INET6]); 2402 carp_mod_cleanup(); 2403 return (proto_reg[CARP_INET6]); 2404 } 2405 err = ip6proto_register(IPPROTO_CARP); 2406 if (err) { 2407 printf("carp: error %d registering with INET6\n", err); 2408 carp_mod_cleanup(); 2409 return (err); 2410 } 2411 #endif 2412 #ifdef INET 2413 carp_iamatch_p = carp_iamatch; 2414 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2415 if (proto_reg[CARP_INET] != 0) { 2416 printf("carp: error %d attaching to PF_INET\n", 2417 proto_reg[CARP_INET]); 2418 carp_mod_cleanup(); 2419 return (proto_reg[CARP_INET]); 2420 } 2421 err = ipproto_register(IPPROTO_CARP); 2422 if (err) { 2423 printf("carp: error %d registering with INET\n", err); 2424 carp_mod_cleanup(); 2425 return (err); 2426 } 2427 #endif 2428 return 0; 2429 } 2430 2431 static int 2432 carp_modevent(module_t mod, int type, void *data) 2433 { 2434 switch (type) { 2435 case MOD_LOAD: 2436 return carp_mod_load(); 2437 /* NOTREACHED */ 2438 case MOD_UNLOAD: 2439 /* 2440 * XXX: For now, disallow module unloading by default due to 2441 * a race condition where a thread may dereference one of the 2442 * function pointer hooks after the module has been 2443 * unloaded, during processing of a packet, causing a panic. 2444 */ 2445 #ifdef CARPMOD_CAN_UNLOAD 2446 carp_mod_cleanup(); 2447 #else 2448 return (EBUSY); 2449 #endif 2450 break; 2451 2452 default: 2453 return (EINVAL); 2454 } 2455 2456 return (0); 2457 } 2458 2459 static moduledata_t carp_mod = { 2460 "carp", 2461 carp_modevent, 2462 0 2463 }; 2464 2465 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2466