1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/protosw.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/signalvar.h> 50 #include <sys/filio.h> 51 #include <sys/sockio.h> 52 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 56 #include <machine/stdarg.h> 57 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 #include <net/fddi.h> 61 #include <net/iso88025.h> 62 #include <net/if.h> 63 #include <net/if_clone.h> 64 #include <net/if_dl.h> 65 #include <net/if_types.h> 66 #include <net/route.h> 67 #include <net/vnet.h> 68 69 #if defined(INET) || defined(INET6) 70 #include <netinet/in.h> 71 #include <netinet/in_var.h> 72 #include <netinet/ip_carp.h> 73 #include <netinet/ip.h> 74 75 #include <machine/in_cksum.h> 76 #endif 77 78 #ifdef INET 79 #include <netinet/in_systm.h> 80 #include <netinet/ip_var.h> 81 #include <netinet/if_ether.h> 82 #endif 83 84 #ifdef INET6 85 #include <netinet/icmp6.h> 86 #include <netinet/ip6.h> 87 #include <netinet6/ip6protosw.h> 88 #include <netinet6/ip6_var.h> 89 #include <netinet6/scope6_var.h> 90 #include <netinet6/in6_var.h> 91 #include <netinet6/nd6.h> 92 #endif 93 94 #include <crypto/sha1.h> 95 96 #define CARP_IFNAME "carp" 97 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 98 SYSCTL_DECL(_net_inet_carp); 99 100 struct carp_softc { 101 struct ifnet *sc_ifp; /* Interface clue */ 102 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 103 struct in_ifaddr *sc_ia; /* primary iface address */ 104 #ifdef INET 105 struct ip_moptions sc_imo; 106 #endif 107 #ifdef INET6 108 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 109 struct ip6_moptions sc_im6o; 110 #endif /* INET6 */ 111 TAILQ_ENTRY(carp_softc) sc_list; 112 113 enum { INIT = 0, BACKUP, MASTER } sc_state; 114 115 int sc_flags_backup; 116 int sc_suppress; 117 118 int sc_sendad_errors; 119 #define CARP_SENDAD_MAX_ERRORS 3 120 int sc_sendad_success; 121 #define CARP_SENDAD_MIN_SUCCESS 3 122 123 int sc_vhid; 124 int sc_advskew; 125 int sc_naddrs; 126 int sc_naddrs6; 127 int sc_advbase; /* seconds */ 128 int sc_init_counter; 129 u_int64_t sc_counter; 130 131 /* authentication */ 132 #define CARP_HMAC_PAD 64 133 unsigned char sc_key[CARP_KEY_LEN]; 134 unsigned char sc_pad[CARP_HMAC_PAD]; 135 SHA1_CTX sc_sha1; 136 137 struct callout sc_ad_tmo; /* advertisement timeout */ 138 struct callout sc_md_tmo; /* master down timeout */ 139 struct callout sc_md6_tmo; /* master down timeout */ 140 141 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 142 }; 143 #define SC2IFP(sc) ((sc)->sc_ifp) 144 145 int carp_suppress_preempt = 0; 146 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 147 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 148 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 149 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 150 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 151 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 152 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 153 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 154 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 155 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 156 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 157 &carp_suppress_preempt, 0, "Preemption is suppressed"); 158 159 struct carpstats carpstats; 160 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 161 &carpstats, carpstats, 162 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 163 164 struct carp_if { 165 TAILQ_HEAD(, carp_softc) vhif_vrs; 166 int vhif_nvrs; 167 168 struct ifnet *vhif_ifp; 169 struct mtx vhif_mtx; 170 }; 171 172 #define CARP_INET 0 173 #define CARP_INET6 1 174 static int proto_reg[] = {-1, -1}; 175 176 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 177 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 178 179 /* lock per carp_if queue */ 180 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 181 NULL, MTX_DEF) 182 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 183 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 184 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 185 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 186 187 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 188 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 189 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 190 191 #define CARP_LOG(...) do { \ 192 if (carp_opts[CARPCTL_LOG] > 0) \ 193 log(LOG_INFO, __VA_ARGS__); \ 194 } while (0) 195 196 #define CARP_DEBUG(...) do { \ 197 if (carp_opts[CARPCTL_LOG] > 1) \ 198 log(LOG_DEBUG, __VA_ARGS__); \ 199 } while (0) 200 201 static void carp_hmac_prepare(struct carp_softc *); 202 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 203 unsigned char *); 204 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 205 unsigned char *); 206 static void carp_setroute(struct carp_softc *, int); 207 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 208 static int carp_clone_create(struct if_clone *, int, caddr_t); 209 static void carp_clone_destroy(struct ifnet *); 210 static void carpdetach(struct carp_softc *, int); 211 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 212 struct carp_header *); 213 static void carp_send_ad_all(void); 214 static void carp_send_ad(void *); 215 static void carp_send_ad_locked(struct carp_softc *); 216 #ifdef INET 217 static void carp_send_arp(struct carp_softc *); 218 #endif 219 static void carp_master_down(void *); 220 static void carp_master_down_locked(struct carp_softc *); 221 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 222 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 223 struct route *); 224 static void carp_start(struct ifnet *); 225 static void carp_setrun(struct carp_softc *, sa_family_t); 226 static void carp_set_state(struct carp_softc *, int); 227 #ifdef INET 228 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 229 #endif 230 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 231 232 #ifdef INET 233 static void carp_multicast_cleanup(struct carp_softc *, int dofree); 234 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 235 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 236 #endif 237 static void carp_carpdev_state_locked(struct carp_if *); 238 static void carp_sc_state_locked(struct carp_softc *); 239 #ifdef INET6 240 static void carp_send_na(struct carp_softc *); 241 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 242 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 243 static void carp_multicast6_cleanup(struct carp_softc *, int dofree); 244 #endif 245 246 static LIST_HEAD(, carp_softc) carpif_list; 247 static struct mtx carp_mtx; 248 IFC_SIMPLE_DECLARE(carp, 0); 249 250 static eventhandler_tag if_detach_event_tag; 251 252 static __inline u_int16_t 253 carp_cksum(struct mbuf *m, int len) 254 { 255 return (in_cksum(m, len)); 256 } 257 258 static void 259 carp_hmac_prepare(struct carp_softc *sc) 260 { 261 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 262 u_int8_t vhid = sc->sc_vhid & 0xff; 263 struct ifaddr *ifa; 264 int i, found; 265 #ifdef INET 266 struct in_addr last, cur, in; 267 #endif 268 #ifdef INET6 269 struct in6_addr last6, cur6, in6; 270 #endif 271 272 if (sc->sc_carpdev) 273 CARP_SCLOCK(sc); 274 275 /* XXX: possible race here */ 276 277 /* compute ipad from key */ 278 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 279 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 280 for (i = 0; i < sizeof(sc->sc_pad); i++) 281 sc->sc_pad[i] ^= 0x36; 282 283 /* precompute first part of inner hash */ 284 SHA1Init(&sc->sc_sha1); 285 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 286 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 287 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 288 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 289 #ifdef INET 290 cur.s_addr = 0; 291 do { 292 found = 0; 293 last = cur; 294 cur.s_addr = 0xffffffff; 295 IF_ADDR_LOCK(SC2IFP(sc)); 296 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 297 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 298 if (ifa->ifa_addr->sa_family == AF_INET && 299 ntohl(in.s_addr) > ntohl(last.s_addr) && 300 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 301 cur.s_addr = in.s_addr; 302 found++; 303 } 304 } 305 IF_ADDR_UNLOCK(SC2IFP(sc)); 306 if (found) 307 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 308 } while (found); 309 #endif /* INET */ 310 #ifdef INET6 311 memset(&cur6, 0, sizeof(cur6)); 312 do { 313 found = 0; 314 last6 = cur6; 315 memset(&cur6, 0xff, sizeof(cur6)); 316 IF_ADDR_LOCK(SC2IFP(sc)); 317 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 318 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 319 if (IN6_IS_SCOPE_EMBED(&in6)) 320 in6.s6_addr16[1] = 0; 321 if (ifa->ifa_addr->sa_family == AF_INET6 && 322 memcmp(&in6, &last6, sizeof(in6)) > 0 && 323 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 324 cur6 = in6; 325 found++; 326 } 327 } 328 IF_ADDR_UNLOCK(SC2IFP(sc)); 329 if (found) 330 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 331 } while (found); 332 #endif /* INET6 */ 333 334 /* convert ipad to opad */ 335 for (i = 0; i < sizeof(sc->sc_pad); i++) 336 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 337 338 if (sc->sc_carpdev) 339 CARP_SCUNLOCK(sc); 340 } 341 342 static void 343 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 344 unsigned char md[20]) 345 { 346 SHA1_CTX sha1ctx; 347 348 /* fetch first half of inner hash */ 349 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 350 351 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 352 SHA1Final(md, &sha1ctx); 353 354 /* outer hash */ 355 SHA1Init(&sha1ctx); 356 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 357 SHA1Update(&sha1ctx, md, 20); 358 SHA1Final(md, &sha1ctx); 359 } 360 361 static int 362 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 363 unsigned char md[20]) 364 { 365 unsigned char md2[20]; 366 367 CARP_SCLOCK_ASSERT(sc); 368 369 carp_hmac_generate(sc, counter, md2); 370 371 return (bcmp(md, md2, sizeof(md2))); 372 } 373 374 static void 375 carp_setroute(struct carp_softc *sc, int cmd) 376 { 377 struct ifaddr *ifa; 378 int s; 379 380 if (sc->sc_carpdev) 381 CARP_SCLOCK_ASSERT(sc); 382 383 s = splnet(); 384 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 385 #ifdef INET 386 if (ifa->ifa_addr->sa_family == AF_INET && 387 sc->sc_carpdev != NULL) { 388 int count = carp_addrcount( 389 (struct carp_if *)sc->sc_carpdev->if_carp, 390 ifatoia(ifa), CARP_COUNT_MASTER); 391 392 if ((cmd == RTM_ADD && count == 1) || 393 (cmd == RTM_DELETE && count == 0)) 394 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 395 } 396 #endif 397 } 398 splx(s); 399 } 400 401 static int 402 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) 403 { 404 405 struct carp_softc *sc; 406 struct ifnet *ifp; 407 408 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 409 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); 410 if (ifp == NULL) { 411 free(sc, M_CARP); 412 return (ENOSPC); 413 } 414 415 sc->sc_flags_backup = 0; 416 sc->sc_suppress = 0; 417 sc->sc_advbase = CARP_DFLTINTV; 418 sc->sc_vhid = -1; /* required setting */ 419 sc->sc_advskew = 0; 420 sc->sc_init_counter = 1; 421 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 422 #ifdef INET 423 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 424 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 425 M_WAITOK); 426 sc->sc_imo.imo_mfilters = NULL; 427 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 428 sc->sc_imo.imo_multicast_vif = -1; 429 #endif 430 #ifdef INET6 431 sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc( 432 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 433 M_WAITOK); 434 sc->sc_im6o.im6o_mfilters = NULL; 435 sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 436 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 437 #endif 438 439 callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE); 440 callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE); 441 callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE); 442 443 ifp->if_softc = sc; 444 if_initname(ifp, CARP_IFNAME, unit); 445 ifp->if_mtu = ETHERMTU; 446 ifp->if_flags = IFF_LOOPBACK; 447 ifp->if_ioctl = carp_ioctl; 448 ifp->if_output = carp_looutput; 449 ifp->if_start = carp_start; 450 ifp->if_type = IFT_CARP; 451 ifp->if_snd.ifq_maxlen = ifqmaxlen; 452 ifp->if_hdrlen = 0; 453 if_attach(ifp); 454 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 455 mtx_lock(&carp_mtx); 456 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 457 mtx_unlock(&carp_mtx); 458 return (0); 459 } 460 461 static void 462 carp_clone_destroy(struct ifnet *ifp) 463 { 464 struct carp_softc *sc = ifp->if_softc; 465 466 if (sc->sc_carpdev) 467 CARP_SCLOCK(sc); 468 carpdetach(sc, 1); /* Returns unlocked. */ 469 470 mtx_lock(&carp_mtx); 471 LIST_REMOVE(sc, sc_next); 472 mtx_unlock(&carp_mtx); 473 bpfdetach(ifp); 474 if_detach(ifp); 475 if_free(ifp); 476 #ifdef INET 477 free(sc->sc_imo.imo_membership, M_CARP); 478 #endif 479 #ifdef INET6 480 free(sc->sc_im6o.im6o_membership, M_CARP); 481 #endif 482 free(sc, M_CARP); 483 } 484 485 /* 486 * This function can be called on CARP interface destroy path, 487 * and in case of the removal of the underlying interface as 488 * well. We differentiate these two cases: in case of destruction 489 * of the underlying interface, we do not cleanup our multicast 490 * memberships, since they are already freed. But we purge pointers 491 * to multicast structures, since they are no longer valid, to 492 * avoid panic in future calls to carpdetach(). Also, we do not 493 * release the lock on return, because the function will be 494 * called once more, for another CARP instance on the same 495 * interface. 496 */ 497 static void 498 carpdetach(struct carp_softc *sc, int unlock) 499 { 500 struct carp_if *cif; 501 502 callout_stop(&sc->sc_ad_tmo); 503 callout_stop(&sc->sc_md_tmo); 504 callout_stop(&sc->sc_md6_tmo); 505 506 if (sc->sc_suppress) 507 carp_suppress_preempt--; 508 sc->sc_suppress = 0; 509 510 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 511 carp_suppress_preempt--; 512 sc->sc_sendad_errors = 0; 513 514 carp_set_state(sc, INIT); 515 SC2IFP(sc)->if_flags &= ~IFF_UP; 516 carp_setrun(sc, 0); 517 #ifdef INET 518 carp_multicast_cleanup(sc, unlock); 519 #endif 520 #ifdef INET6 521 carp_multicast6_cleanup(sc, unlock); 522 #endif 523 524 if (sc->sc_carpdev != NULL) { 525 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 526 CARP_LOCK_ASSERT(cif); 527 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 528 if (!--cif->vhif_nvrs) { 529 ifpromisc(sc->sc_carpdev, 0); 530 sc->sc_carpdev->if_carp = NULL; 531 CARP_LOCK_DESTROY(cif); 532 free(cif, M_CARP); 533 } else if (unlock) 534 CARP_UNLOCK(cif); 535 sc->sc_carpdev = NULL; 536 } 537 } 538 539 /* Detach an interface from the carp. */ 540 static void 541 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 542 { 543 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 544 struct carp_softc *sc, *nextsc; 545 546 if (cif == NULL) 547 return; 548 549 /* 550 * XXX: At the end of for() cycle the lock will be destroyed. 551 */ 552 CARP_LOCK(cif); 553 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 554 nextsc = TAILQ_NEXT(sc, sc_list); 555 carpdetach(sc, 0); 556 } 557 } 558 559 /* 560 * process input packet. 561 * we have rearranged checks order compared to the rfc, 562 * but it seems more efficient this way or not possible otherwise. 563 */ 564 #ifdef INET 565 void 566 carp_input(struct mbuf *m, int hlen) 567 { 568 struct ip *ip = mtod(m, struct ip *); 569 struct carp_header *ch; 570 int iplen, len; 571 572 CARPSTATS_INC(carps_ipackets); 573 574 if (!carp_opts[CARPCTL_ALLOW]) { 575 m_freem(m); 576 return; 577 } 578 579 /* check if received on a valid carp interface */ 580 if (m->m_pkthdr.rcvif->if_carp == NULL) { 581 CARPSTATS_INC(carps_badif); 582 CARP_DEBUG("carp_input: packet received on non-carp " 583 "interface: %s\n", 584 m->m_pkthdr.rcvif->if_xname); 585 m_freem(m); 586 return; 587 } 588 589 /* verify that the IP TTL is 255. */ 590 if (ip->ip_ttl != CARP_DFLTTL) { 591 CARPSTATS_INC(carps_badttl); 592 CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n", 593 ip->ip_ttl, 594 m->m_pkthdr.rcvif->if_xname); 595 m_freem(m); 596 return; 597 } 598 599 iplen = ip->ip_hl << 2; 600 601 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 602 CARPSTATS_INC(carps_badlen); 603 CARP_DEBUG("carp_input: received len %zd < " 604 "sizeof(struct carp_header) on %s\n", 605 m->m_len - sizeof(struct ip), 606 m->m_pkthdr.rcvif->if_xname); 607 m_freem(m); 608 return; 609 } 610 611 if (iplen + sizeof(*ch) < m->m_len) { 612 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 613 CARPSTATS_INC(carps_hdrops); 614 CARP_DEBUG("carp_input: pullup failed\n"); 615 return; 616 } 617 ip = mtod(m, struct ip *); 618 } 619 ch = (struct carp_header *)((char *)ip + iplen); 620 621 /* 622 * verify that the received packet length is 623 * equal to the CARP header 624 */ 625 len = iplen + sizeof(*ch); 626 if (len > m->m_pkthdr.len) { 627 CARPSTATS_INC(carps_badlen); 628 CARP_DEBUG("carp_input: packet too short %d on %s\n", 629 m->m_pkthdr.len, 630 m->m_pkthdr.rcvif->if_xname); 631 m_freem(m); 632 return; 633 } 634 635 if ((m = m_pullup(m, len)) == NULL) { 636 CARPSTATS_INC(carps_hdrops); 637 return; 638 } 639 ip = mtod(m, struct ip *); 640 ch = (struct carp_header *)((char *)ip + iplen); 641 642 /* verify the CARP checksum */ 643 m->m_data += iplen; 644 if (carp_cksum(m, len - iplen)) { 645 CARPSTATS_INC(carps_badsum); 646 CARP_DEBUG("carp_input: checksum failed on %s\n", 647 m->m_pkthdr.rcvif->if_xname); 648 m_freem(m); 649 return; 650 } 651 m->m_data -= iplen; 652 653 carp_input_c(m, ch, AF_INET); 654 } 655 #endif 656 657 #ifdef INET6 658 int 659 carp6_input(struct mbuf **mp, int *offp, int proto) 660 { 661 struct mbuf *m = *mp; 662 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 663 struct carp_header *ch; 664 u_int len; 665 666 CARPSTATS_INC(carps_ipackets6); 667 668 if (!carp_opts[CARPCTL_ALLOW]) { 669 m_freem(m); 670 return (IPPROTO_DONE); 671 } 672 673 /* check if received on a valid carp interface */ 674 if (m->m_pkthdr.rcvif->if_carp == NULL) { 675 CARPSTATS_INC(carps_badif); 676 CARP_DEBUG("carp6_input: packet received on non-carp " 677 "interface: %s\n", 678 m->m_pkthdr.rcvif->if_xname); 679 m_freem(m); 680 return (IPPROTO_DONE); 681 } 682 683 /* verify that the IP TTL is 255 */ 684 if (ip6->ip6_hlim != CARP_DFLTTL) { 685 CARPSTATS_INC(carps_badttl); 686 CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n", 687 ip6->ip6_hlim, 688 m->m_pkthdr.rcvif->if_xname); 689 m_freem(m); 690 return (IPPROTO_DONE); 691 } 692 693 /* verify that we have a complete carp packet */ 694 len = m->m_len; 695 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 696 if (ch == NULL) { 697 CARPSTATS_INC(carps_badlen); 698 CARP_DEBUG("carp6_input: packet size %u too small\n", len); 699 return (IPPROTO_DONE); 700 } 701 702 703 /* verify the CARP checksum */ 704 m->m_data += *offp; 705 if (carp_cksum(m, sizeof(*ch))) { 706 CARPSTATS_INC(carps_badsum); 707 CARP_DEBUG("carp6_input: checksum failed, on %s\n", 708 m->m_pkthdr.rcvif->if_xname); 709 m_freem(m); 710 return (IPPROTO_DONE); 711 } 712 m->m_data -= *offp; 713 714 carp_input_c(m, ch, AF_INET6); 715 return (IPPROTO_DONE); 716 } 717 #endif /* INET6 */ 718 719 static void 720 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 721 { 722 struct ifnet *ifp = m->m_pkthdr.rcvif; 723 struct carp_softc *sc; 724 u_int64_t tmp_counter; 725 struct timeval sc_tv, ch_tv; 726 727 /* verify that the VHID is valid on the receiving interface */ 728 CARP_LOCK(ifp->if_carp); 729 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 730 if (sc->sc_vhid == ch->carp_vhid) 731 break; 732 733 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && 734 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 735 CARPSTATS_INC(carps_badvhid); 736 CARP_UNLOCK(ifp->if_carp); 737 m_freem(m); 738 return; 739 } 740 741 getmicrotime(&SC2IFP(sc)->if_lastchange); 742 SC2IFP(sc)->if_ipackets++; 743 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; 744 745 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { 746 uint32_t af1 = af; 747 #ifdef INET 748 struct ip *ip = mtod(m, struct ip *); 749 750 /* BPF wants net byte order */ 751 if (af == AF_INET) { 752 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 753 ip->ip_off = htons(ip->ip_off); 754 } 755 #endif 756 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); 757 } 758 759 /* verify the CARP version. */ 760 if (ch->carp_version != CARP_VERSION) { 761 CARPSTATS_INC(carps_badver); 762 SC2IFP(sc)->if_ierrors++; 763 CARP_UNLOCK(ifp->if_carp); 764 CARP_DEBUG("%s; invalid version %d\n", 765 SC2IFP(sc)->if_xname, 766 ch->carp_version); 767 m_freem(m); 768 return; 769 } 770 771 /* verify the hash */ 772 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 773 CARPSTATS_INC(carps_badauth); 774 SC2IFP(sc)->if_ierrors++; 775 CARP_UNLOCK(ifp->if_carp); 776 CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); 777 m_freem(m); 778 return; 779 } 780 781 tmp_counter = ntohl(ch->carp_counter[0]); 782 tmp_counter = tmp_counter<<32; 783 tmp_counter += ntohl(ch->carp_counter[1]); 784 785 /* XXX Replay protection goes here */ 786 787 sc->sc_init_counter = 0; 788 sc->sc_counter = tmp_counter; 789 790 sc_tv.tv_sec = sc->sc_advbase; 791 if (carp_suppress_preempt && sc->sc_advskew < 240) 792 sc_tv.tv_usec = 240 * 1000000 / 256; 793 else 794 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 795 ch_tv.tv_sec = ch->carp_advbase; 796 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 797 798 switch (sc->sc_state) { 799 case INIT: 800 break; 801 case MASTER: 802 /* 803 * If we receive an advertisement from a master who's going to 804 * be more frequent than us, go into BACKUP state. 805 */ 806 if (timevalcmp(&sc_tv, &ch_tv, >) || 807 timevalcmp(&sc_tv, &ch_tv, ==)) { 808 callout_stop(&sc->sc_ad_tmo); 809 CARP_LOG("%s: MASTER -> BACKUP " 810 "(more frequent advertisement received)\n", 811 SC2IFP(sc)->if_xname); 812 carp_set_state(sc, BACKUP); 813 carp_setrun(sc, 0); 814 carp_setroute(sc, RTM_DELETE); 815 } 816 break; 817 case BACKUP: 818 /* 819 * If we're pre-empting masters who advertise slower than us, 820 * and this one claims to be slower, treat him as down. 821 */ 822 if (carp_opts[CARPCTL_PREEMPT] && 823 timevalcmp(&sc_tv, &ch_tv, <)) { 824 CARP_LOG("%s: BACKUP -> MASTER " 825 "(preempting a slower master)\n", 826 SC2IFP(sc)->if_xname); 827 carp_master_down_locked(sc); 828 break; 829 } 830 831 /* 832 * If the master is going to advertise at such a low frequency 833 * that he's guaranteed to time out, we'd might as well just 834 * treat him as timed out now. 835 */ 836 sc_tv.tv_sec = sc->sc_advbase * 3; 837 if (timevalcmp(&sc_tv, &ch_tv, <)) { 838 CARP_LOG("%s: BACKUP -> MASTER " 839 "(master timed out)\n", 840 SC2IFP(sc)->if_xname); 841 carp_master_down_locked(sc); 842 break; 843 } 844 845 /* 846 * Otherwise, we reset the counter and wait for the next 847 * advertisement. 848 */ 849 carp_setrun(sc, af); 850 break; 851 } 852 853 CARP_UNLOCK(ifp->if_carp); 854 855 m_freem(m); 856 return; 857 } 858 859 static int 860 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 861 { 862 struct m_tag *mtag; 863 struct ifnet *ifp = SC2IFP(sc); 864 865 if (sc->sc_init_counter) { 866 /* this could also be seconds since unix epoch */ 867 sc->sc_counter = arc4random(); 868 sc->sc_counter = sc->sc_counter << 32; 869 sc->sc_counter += arc4random(); 870 } else 871 sc->sc_counter++; 872 873 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 874 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 875 876 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 877 878 /* Tag packet for carp_output */ 879 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 880 if (mtag == NULL) { 881 m_freem(m); 882 SC2IFP(sc)->if_oerrors++; 883 return (ENOMEM); 884 } 885 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 886 m_tag_prepend(m, mtag); 887 888 return (0); 889 } 890 891 static void 892 carp_send_ad_all(void) 893 { 894 struct carp_softc *sc; 895 896 mtx_lock(&carp_mtx); 897 LIST_FOREACH(sc, &carpif_list, sc_next) { 898 if (sc->sc_carpdev == NULL) 899 continue; 900 CARP_SCLOCK(sc); 901 if ((SC2IFP(sc)->if_flags & IFF_UP) && 902 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && 903 sc->sc_state == MASTER) 904 carp_send_ad_locked(sc); 905 CARP_SCUNLOCK(sc); 906 } 907 mtx_unlock(&carp_mtx); 908 } 909 910 static void 911 carp_send_ad(void *v) 912 { 913 struct carp_softc *sc = v; 914 915 CARP_SCLOCK(sc); 916 carp_send_ad_locked(sc); 917 CARP_SCUNLOCK(sc); 918 } 919 920 static void 921 carp_send_ad_locked(struct carp_softc *sc) 922 { 923 struct carp_header ch; 924 struct timeval tv; 925 struct carp_header *ch_ptr; 926 struct mbuf *m; 927 int len, advbase, advskew; 928 929 CARP_SCLOCK_ASSERT(sc); 930 931 /* bow out if we've lost our UPness or RUNNINGuiness */ 932 if (!((SC2IFP(sc)->if_flags & IFF_UP) && 933 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 934 advbase = 255; 935 advskew = 255; 936 } else { 937 advbase = sc->sc_advbase; 938 if (!carp_suppress_preempt || sc->sc_advskew > 240) 939 advskew = sc->sc_advskew; 940 else 941 advskew = 240; 942 tv.tv_sec = advbase; 943 tv.tv_usec = advskew * 1000000 / 256; 944 } 945 946 ch.carp_version = CARP_VERSION; 947 ch.carp_type = CARP_ADVERTISEMENT; 948 ch.carp_vhid = sc->sc_vhid; 949 ch.carp_advbase = advbase; 950 ch.carp_advskew = advskew; 951 ch.carp_authlen = 7; /* XXX DEFINE */ 952 ch.carp_pad1 = 0; /* must be zero */ 953 ch.carp_cksum = 0; 954 955 #ifdef INET 956 if (sc->sc_ia) { 957 struct ip *ip; 958 959 MGETHDR(m, M_DONTWAIT, MT_HEADER); 960 if (m == NULL) { 961 SC2IFP(sc)->if_oerrors++; 962 CARPSTATS_INC(carps_onomem); 963 /* XXX maybe less ? */ 964 if (advbase != 255 || advskew != 255) 965 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 966 carp_send_ad, sc); 967 return; 968 } 969 len = sizeof(*ip) + sizeof(ch); 970 m->m_pkthdr.len = len; 971 m->m_pkthdr.rcvif = NULL; 972 m->m_len = len; 973 MH_ALIGN(m, m->m_len); 974 m->m_flags |= M_MCAST; 975 ip = mtod(m, struct ip *); 976 ip->ip_v = IPVERSION; 977 ip->ip_hl = sizeof(*ip) >> 2; 978 ip->ip_tos = IPTOS_LOWDELAY; 979 ip->ip_len = len; 980 ip->ip_id = ip_newid(); 981 ip->ip_off = IP_DF; 982 ip->ip_ttl = CARP_DFLTTL; 983 ip->ip_p = IPPROTO_CARP; 984 ip->ip_sum = 0; 985 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 986 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 987 988 ch_ptr = (struct carp_header *)(&ip[1]); 989 bcopy(&ch, ch_ptr, sizeof(ch)); 990 if (carp_prepare_ad(m, sc, ch_ptr)) 991 return; 992 993 m->m_data += sizeof(*ip); 994 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 995 m->m_data -= sizeof(*ip); 996 997 getmicrotime(&SC2IFP(sc)->if_lastchange); 998 SC2IFP(sc)->if_opackets++; 999 SC2IFP(sc)->if_obytes += len; 1000 CARPSTATS_INC(carps_opackets); 1001 1002 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 1003 SC2IFP(sc)->if_oerrors++; 1004 if (sc->sc_sendad_errors < INT_MAX) 1005 sc->sc_sendad_errors++; 1006 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1007 carp_suppress_preempt++; 1008 if (carp_suppress_preempt == 1) { 1009 CARP_SCUNLOCK(sc); 1010 carp_send_ad_all(); 1011 CARP_SCLOCK(sc); 1012 } 1013 } 1014 sc->sc_sendad_success = 0; 1015 } else { 1016 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1017 if (++sc->sc_sendad_success >= 1018 CARP_SENDAD_MIN_SUCCESS) { 1019 carp_suppress_preempt--; 1020 sc->sc_sendad_errors = 0; 1021 } 1022 } else 1023 sc->sc_sendad_errors = 0; 1024 } 1025 } 1026 #endif /* INET */ 1027 #ifdef INET6 1028 if (sc->sc_ia6) { 1029 struct ip6_hdr *ip6; 1030 1031 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1032 if (m == NULL) { 1033 SC2IFP(sc)->if_oerrors++; 1034 CARPSTATS_INC(carps_onomem); 1035 /* XXX maybe less ? */ 1036 if (advbase != 255 || advskew != 255) 1037 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1038 carp_send_ad, sc); 1039 return; 1040 } 1041 len = sizeof(*ip6) + sizeof(ch); 1042 m->m_pkthdr.len = len; 1043 m->m_pkthdr.rcvif = NULL; 1044 m->m_len = len; 1045 MH_ALIGN(m, m->m_len); 1046 m->m_flags |= M_MCAST; 1047 ip6 = mtod(m, struct ip6_hdr *); 1048 bzero(ip6, sizeof(*ip6)); 1049 ip6->ip6_vfc |= IPV6_VERSION; 1050 ip6->ip6_hlim = CARP_DFLTTL; 1051 ip6->ip6_nxt = IPPROTO_CARP; 1052 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1053 sizeof(struct in6_addr)); 1054 /* set the multicast destination */ 1055 1056 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1057 ip6->ip6_dst.s6_addr8[15] = 0x12; 1058 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1059 SC2IFP(sc)->if_oerrors++; 1060 m_freem(m); 1061 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1062 return; 1063 } 1064 1065 ch_ptr = (struct carp_header *)(&ip6[1]); 1066 bcopy(&ch, ch_ptr, sizeof(ch)); 1067 if (carp_prepare_ad(m, sc, ch_ptr)) 1068 return; 1069 1070 m->m_data += sizeof(*ip6); 1071 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1072 m->m_data -= sizeof(*ip6); 1073 1074 getmicrotime(&SC2IFP(sc)->if_lastchange); 1075 SC2IFP(sc)->if_opackets++; 1076 SC2IFP(sc)->if_obytes += len; 1077 CARPSTATS_INC(carps_opackets6); 1078 1079 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1080 SC2IFP(sc)->if_oerrors++; 1081 if (sc->sc_sendad_errors < INT_MAX) 1082 sc->sc_sendad_errors++; 1083 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1084 carp_suppress_preempt++; 1085 if (carp_suppress_preempt == 1) { 1086 CARP_SCUNLOCK(sc); 1087 carp_send_ad_all(); 1088 CARP_SCLOCK(sc); 1089 } 1090 } 1091 sc->sc_sendad_success = 0; 1092 } else { 1093 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1094 if (++sc->sc_sendad_success >= 1095 CARP_SENDAD_MIN_SUCCESS) { 1096 carp_suppress_preempt--; 1097 sc->sc_sendad_errors = 0; 1098 } 1099 } else 1100 sc->sc_sendad_errors = 0; 1101 } 1102 } 1103 #endif /* INET6 */ 1104 1105 if (advbase != 255 || advskew != 255) 1106 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1107 carp_send_ad, sc); 1108 1109 } 1110 1111 #ifdef INET 1112 /* 1113 * Broadcast a gratuitous ARP request containing 1114 * the virtual router MAC address for each IP address 1115 * associated with the virtual router. 1116 */ 1117 static void 1118 carp_send_arp(struct carp_softc *sc) 1119 { 1120 struct ifaddr *ifa; 1121 1122 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1123 1124 if (ifa->ifa_addr->sa_family != AF_INET) 1125 continue; 1126 1127 /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ 1128 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); 1129 1130 DELAY(1000); /* XXX */ 1131 } 1132 } 1133 #endif 1134 1135 #ifdef INET6 1136 static void 1137 carp_send_na(struct carp_softc *sc) 1138 { 1139 struct ifaddr *ifa; 1140 struct in6_addr *in6; 1141 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1142 1143 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1144 1145 if (ifa->ifa_addr->sa_family != AF_INET6) 1146 continue; 1147 1148 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1149 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1150 ND_NA_FLAG_OVERRIDE, 1, NULL); 1151 DELAY(1000); /* XXX */ 1152 } 1153 } 1154 #endif /* INET6 */ 1155 1156 #ifdef INET 1157 static int 1158 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1159 { 1160 struct carp_softc *vh; 1161 struct ifaddr *ifa; 1162 int count = 0; 1163 1164 CARP_LOCK_ASSERT(cif); 1165 1166 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1167 if ((type == CARP_COUNT_RUNNING && 1168 (SC2IFP(vh)->if_flags & IFF_UP) && 1169 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || 1170 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1171 IF_ADDR_LOCK(SC2IFP(vh)); 1172 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1173 ifa_list) { 1174 if (ifa->ifa_addr->sa_family == AF_INET && 1175 ia->ia_addr.sin_addr.s_addr == 1176 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1177 count++; 1178 } 1179 IF_ADDR_UNLOCK(SC2IFP(vh)); 1180 } 1181 } 1182 return (count); 1183 } 1184 1185 int 1186 carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, 1187 struct in_addr *isaddr, u_int8_t **enaddr) 1188 { 1189 struct carp_if *cif; 1190 struct carp_softc *vh; 1191 int index, count = 0; 1192 struct ifaddr *ifa; 1193 1194 cif = ifp->if_carp; 1195 CARP_LOCK(cif); 1196 1197 if (carp_opts[CARPCTL_ARPBALANCE]) { 1198 /* 1199 * XXX proof of concept implementation. 1200 * We use the source ip to decide which virtual host should 1201 * handle the request. If we're master of that virtual host, 1202 * then we respond, otherwise, just drop the arp packet on 1203 * the floor. 1204 */ 1205 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1206 if (count == 0) { 1207 /* should never reach this */ 1208 CARP_UNLOCK(cif); 1209 return (0); 1210 } 1211 1212 /* this should be a hash, like pf_hash() */ 1213 index = ntohl(isaddr->s_addr) % count; 1214 count = 0; 1215 1216 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1217 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1218 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { 1219 IF_ADDR_LOCK(SC2IFP(vh)); 1220 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1221 ifa_list) { 1222 if (ifa->ifa_addr->sa_family == 1223 AF_INET && 1224 ia->ia_addr.sin_addr.s_addr == 1225 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1226 if (count == index) { 1227 if (vh->sc_state == 1228 MASTER) { 1229 *enaddr = IF_LLADDR(vh->sc_ifp); 1230 IF_ADDR_UNLOCK(SC2IFP(vh)); 1231 CARP_UNLOCK(cif); 1232 return (1); 1233 } else { 1234 IF_ADDR_UNLOCK(SC2IFP(vh)); 1235 CARP_UNLOCK(cif); 1236 return (0); 1237 } 1238 } 1239 count++; 1240 } 1241 } 1242 IF_ADDR_UNLOCK(SC2IFP(vh)); 1243 } 1244 } 1245 } else { 1246 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1247 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1248 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1249 ia->ia_ifp == SC2IFP(vh) && 1250 vh->sc_state == MASTER) { 1251 *enaddr = IF_LLADDR(vh->sc_ifp); 1252 CARP_UNLOCK(cif); 1253 return (1); 1254 } 1255 } 1256 } 1257 CARP_UNLOCK(cif); 1258 return (0); 1259 } 1260 #endif 1261 1262 #ifdef INET6 1263 struct ifaddr * 1264 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1265 { 1266 struct carp_if *cif; 1267 struct carp_softc *vh; 1268 struct ifaddr *ifa; 1269 1270 cif = ifp->if_carp; 1271 CARP_LOCK(cif); 1272 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1273 IF_ADDR_LOCK(SC2IFP(vh)); 1274 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { 1275 if (IN6_ARE_ADDR_EQUAL(taddr, 1276 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1277 (SC2IFP(vh)->if_flags & IFF_UP) && 1278 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1279 vh->sc_state == MASTER) { 1280 ifa_ref(ifa); 1281 IF_ADDR_UNLOCK(SC2IFP(vh)); 1282 CARP_UNLOCK(cif); 1283 return (ifa); 1284 } 1285 } 1286 IF_ADDR_UNLOCK(SC2IFP(vh)); 1287 } 1288 CARP_UNLOCK(cif); 1289 1290 return (NULL); 1291 } 1292 1293 caddr_t 1294 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1295 { 1296 struct m_tag *mtag; 1297 struct carp_if *cif; 1298 struct carp_softc *sc; 1299 struct ifaddr *ifa; 1300 1301 cif = ifp->if_carp; 1302 CARP_LOCK(cif); 1303 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1304 IF_ADDR_LOCK(SC2IFP(sc)); 1305 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1306 if (IN6_ARE_ADDR_EQUAL(taddr, 1307 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1308 (SC2IFP(sc)->if_flags & IFF_UP) && 1309 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { 1310 struct ifnet *ifp = SC2IFP(sc); 1311 mtag = m_tag_get(PACKET_TAG_CARP, 1312 sizeof(struct ifnet *), M_NOWAIT); 1313 if (mtag == NULL) { 1314 /* better a bit than nothing */ 1315 IF_ADDR_UNLOCK(SC2IFP(sc)); 1316 CARP_UNLOCK(cif); 1317 return (IF_LLADDR(sc->sc_ifp)); 1318 } 1319 bcopy(&ifp, (caddr_t)(mtag + 1), 1320 sizeof(struct ifnet *)); 1321 m_tag_prepend(m, mtag); 1322 1323 IF_ADDR_UNLOCK(SC2IFP(sc)); 1324 CARP_UNLOCK(cif); 1325 return (IF_LLADDR(sc->sc_ifp)); 1326 } 1327 } 1328 IF_ADDR_UNLOCK(SC2IFP(sc)); 1329 } 1330 CARP_UNLOCK(cif); 1331 1332 return (NULL); 1333 } 1334 #endif 1335 1336 struct ifnet * 1337 carp_forus(struct ifnet *ifp, u_char *dhost) 1338 { 1339 struct carp_if *cif; 1340 struct carp_softc *vh; 1341 u_int8_t *ena = dhost; 1342 1343 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1344 return (NULL); 1345 1346 cif = ifp->if_carp; 1347 CARP_LOCK(cif); 1348 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1349 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1350 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1351 vh->sc_state == MASTER && 1352 !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { 1353 CARP_UNLOCK(cif); 1354 return (SC2IFP(vh)); 1355 } 1356 1357 CARP_UNLOCK(cif); 1358 return (NULL); 1359 } 1360 1361 static void 1362 carp_master_down(void *v) 1363 { 1364 struct carp_softc *sc = v; 1365 1366 CARP_SCLOCK(sc); 1367 carp_master_down_locked(sc); 1368 CARP_SCUNLOCK(sc); 1369 } 1370 1371 static void 1372 carp_master_down_locked(struct carp_softc *sc) 1373 { 1374 if (sc->sc_carpdev) 1375 CARP_SCLOCK_ASSERT(sc); 1376 1377 switch (sc->sc_state) { 1378 case INIT: 1379 printf("%s: master_down event in INIT state\n", 1380 SC2IFP(sc)->if_xname); 1381 break; 1382 case MASTER: 1383 break; 1384 case BACKUP: 1385 carp_set_state(sc, MASTER); 1386 carp_send_ad_locked(sc); 1387 #ifdef INET 1388 carp_send_arp(sc); 1389 #endif 1390 #ifdef INET6 1391 carp_send_na(sc); 1392 #endif /* INET6 */ 1393 carp_setrun(sc, 0); 1394 carp_setroute(sc, RTM_ADD); 1395 break; 1396 } 1397 } 1398 1399 /* 1400 * When in backup state, af indicates whether to reset the master down timer 1401 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1402 */ 1403 static void 1404 carp_setrun(struct carp_softc *sc, sa_family_t af) 1405 { 1406 struct timeval tv; 1407 1408 if (sc->sc_carpdev == NULL) { 1409 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1410 carp_set_state(sc, INIT); 1411 return; 1412 } else 1413 CARP_SCLOCK_ASSERT(sc); 1414 1415 if (SC2IFP(sc)->if_flags & IFF_UP && 1416 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) && 1417 sc->sc_carpdev->if_link_state == LINK_STATE_UP) 1418 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 1419 else { 1420 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1421 carp_setroute(sc, RTM_DELETE); 1422 return; 1423 } 1424 1425 switch (sc->sc_state) { 1426 case INIT: 1427 CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); 1428 carp_set_state(sc, BACKUP); 1429 carp_setroute(sc, RTM_DELETE); 1430 carp_setrun(sc, 0); 1431 break; 1432 case BACKUP: 1433 callout_stop(&sc->sc_ad_tmo); 1434 tv.tv_sec = 3 * sc->sc_advbase; 1435 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1436 switch (af) { 1437 #ifdef INET 1438 case AF_INET: 1439 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1440 carp_master_down, sc); 1441 break; 1442 #endif /* INET */ 1443 #ifdef INET6 1444 case AF_INET6: 1445 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1446 carp_master_down, sc); 1447 break; 1448 #endif /* INET6 */ 1449 default: 1450 if (sc->sc_naddrs) 1451 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1452 carp_master_down, sc); 1453 if (sc->sc_naddrs6) 1454 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1455 carp_master_down, sc); 1456 break; 1457 } 1458 break; 1459 case MASTER: 1460 tv.tv_sec = sc->sc_advbase; 1461 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1462 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1463 carp_send_ad, sc); 1464 break; 1465 } 1466 } 1467 1468 #ifdef INET 1469 static void 1470 carp_multicast_cleanup(struct carp_softc *sc, int dofree) 1471 { 1472 struct ip_moptions *imo = &sc->sc_imo; 1473 u_int16_t n = imo->imo_num_memberships; 1474 1475 /* Clean up our own multicast memberships */ 1476 while (n-- > 0) { 1477 if (imo->imo_membership[n] != NULL) { 1478 if (dofree) 1479 in_delmulti(imo->imo_membership[n]); 1480 imo->imo_membership[n] = NULL; 1481 } 1482 } 1483 KASSERT(imo->imo_mfilters == NULL, 1484 ("%s: imo_mfilters != NULL", __func__)); 1485 imo->imo_num_memberships = 0; 1486 imo->imo_multicast_ifp = NULL; 1487 } 1488 #endif 1489 1490 #ifdef INET6 1491 static void 1492 carp_multicast6_cleanup(struct carp_softc *sc, int dofree) 1493 { 1494 struct ip6_moptions *im6o = &sc->sc_im6o; 1495 u_int16_t n = im6o->im6o_num_memberships; 1496 1497 while (n-- > 0) { 1498 if (im6o->im6o_membership[n] != NULL) { 1499 if (dofree) 1500 in6_mc_leave(im6o->im6o_membership[n], NULL); 1501 im6o->im6o_membership[n] = NULL; 1502 } 1503 } 1504 KASSERT(im6o->im6o_mfilters == NULL, 1505 ("%s: im6o_mfilters != NULL", __func__)); 1506 im6o->im6o_num_memberships = 0; 1507 im6o->im6o_multicast_ifp = NULL; 1508 } 1509 #endif 1510 1511 #ifdef INET 1512 static int 1513 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1514 { 1515 struct ifnet *ifp; 1516 struct carp_if *cif; 1517 struct in_ifaddr *ia, *ia_if; 1518 struct ip_moptions *imo = &sc->sc_imo; 1519 struct in_addr addr; 1520 u_long iaddr = htonl(sin->sin_addr.s_addr); 1521 int own, error; 1522 1523 if (sin->sin_addr.s_addr == 0) { 1524 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1525 carp_set_state(sc, INIT); 1526 if (sc->sc_naddrs) 1527 SC2IFP(sc)->if_flags |= IFF_UP; 1528 if (sc->sc_carpdev) 1529 CARP_SCLOCK(sc); 1530 carp_setrun(sc, 0); 1531 if (sc->sc_carpdev) 1532 CARP_SCUNLOCK(sc); 1533 return (0); 1534 } 1535 1536 /* we have to do it by hands to check we won't match on us */ 1537 ia_if = NULL; own = 0; 1538 IN_IFADDR_RLOCK(); 1539 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1540 /* and, yeah, we need a multicast-capable iface too */ 1541 if (ia->ia_ifp != SC2IFP(sc) && 1542 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1543 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1544 if (!ia_if) 1545 ia_if = ia; 1546 if (sin->sin_addr.s_addr == 1547 ia->ia_addr.sin_addr.s_addr) 1548 own++; 1549 } 1550 } 1551 1552 if (!ia_if) { 1553 IN_IFADDR_RUNLOCK(); 1554 return (EADDRNOTAVAIL); 1555 } 1556 1557 ia = ia_if; 1558 ifa_ref(&ia->ia_ifa); 1559 IN_IFADDR_RUNLOCK(); 1560 1561 ifp = ia->ia_ifp; 1562 1563 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1564 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) { 1565 ifa_free(&ia->ia_ifa); 1566 return (EADDRNOTAVAIL); 1567 } 1568 1569 if (imo->imo_num_memberships == 0) { 1570 addr.s_addr = htonl(INADDR_CARP_GROUP); 1571 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == 1572 NULL) { 1573 ifa_free(&ia->ia_ifa); 1574 return (ENOBUFS); 1575 } 1576 imo->imo_num_memberships++; 1577 imo->imo_multicast_ifp = ifp; 1578 imo->imo_multicast_ttl = CARP_DFLTTL; 1579 imo->imo_multicast_loop = 0; 1580 } 1581 1582 if (!ifp->if_carp) { 1583 1584 cif = malloc(sizeof(*cif), M_CARP, 1585 M_WAITOK|M_ZERO); 1586 if (!cif) { 1587 error = ENOBUFS; 1588 goto cleanup; 1589 } 1590 if ((error = ifpromisc(ifp, 1))) { 1591 free(cif, M_CARP); 1592 goto cleanup; 1593 } 1594 1595 CARP_LOCK_INIT(cif); 1596 CARP_LOCK(cif); 1597 cif->vhif_ifp = ifp; 1598 TAILQ_INIT(&cif->vhif_vrs); 1599 ifp->if_carp = cif; 1600 1601 } else { 1602 struct carp_softc *vr; 1603 1604 cif = (struct carp_if *)ifp->if_carp; 1605 CARP_LOCK(cif); 1606 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1607 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1608 CARP_UNLOCK(cif); 1609 error = EEXIST; 1610 goto cleanup; 1611 } 1612 } 1613 sc->sc_ia = ia; 1614 sc->sc_carpdev = ifp; 1615 1616 { /* XXX prevent endless loop if already in queue */ 1617 struct carp_softc *vr, *after = NULL; 1618 int myself = 0; 1619 cif = (struct carp_if *)ifp->if_carp; 1620 1621 /* XXX: cif should not change, right? So we still hold the lock */ 1622 CARP_LOCK_ASSERT(cif); 1623 1624 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1625 if (vr == sc) 1626 myself = 1; 1627 if (vr->sc_vhid < sc->sc_vhid) 1628 after = vr; 1629 } 1630 1631 if (!myself) { 1632 /* We're trying to keep things in order */ 1633 if (after == NULL) { 1634 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1635 } else { 1636 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1637 } 1638 cif->vhif_nvrs++; 1639 } 1640 } 1641 1642 sc->sc_naddrs++; 1643 SC2IFP(sc)->if_flags |= IFF_UP; 1644 if (own) 1645 sc->sc_advskew = 0; 1646 carp_sc_state_locked(sc); 1647 carp_setrun(sc, 0); 1648 1649 CARP_UNLOCK(cif); 1650 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1651 1652 return (0); 1653 1654 cleanup: 1655 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1656 ifa_free(&ia->ia_ifa); 1657 return (error); 1658 } 1659 1660 static int 1661 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1662 { 1663 int error = 0; 1664 1665 if (!--sc->sc_naddrs) { 1666 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1667 struct ip_moptions *imo = &sc->sc_imo; 1668 1669 CARP_LOCK(cif); 1670 callout_stop(&sc->sc_ad_tmo); 1671 SC2IFP(sc)->if_flags &= ~IFF_UP; 1672 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1673 sc->sc_vhid = -1; 1674 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1675 imo->imo_multicast_ifp = NULL; 1676 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1677 if (!--cif->vhif_nvrs) { 1678 sc->sc_carpdev->if_carp = NULL; 1679 CARP_LOCK_DESTROY(cif); 1680 free(cif, M_CARP); 1681 } else { 1682 CARP_UNLOCK(cif); 1683 } 1684 } 1685 1686 return (error); 1687 } 1688 #endif 1689 1690 #ifdef INET6 1691 static int 1692 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1693 { 1694 struct ifnet *ifp; 1695 struct carp_if *cif; 1696 struct in6_ifaddr *ia, *ia_if; 1697 struct ip6_moptions *im6o = &sc->sc_im6o; 1698 struct in6_addr in6; 1699 int own, error; 1700 1701 error = 0; 1702 1703 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1704 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1705 carp_set_state(sc, INIT); 1706 if (sc->sc_naddrs6) 1707 SC2IFP(sc)->if_flags |= IFF_UP; 1708 if (sc->sc_carpdev) 1709 CARP_SCLOCK(sc); 1710 carp_setrun(sc, 0); 1711 if (sc->sc_carpdev) 1712 CARP_SCUNLOCK(sc); 1713 return (0); 1714 } 1715 1716 /* we have to do it by hands to check we won't match on us */ 1717 ia_if = NULL; own = 0; 1718 IN6_IFADDR_RLOCK(); 1719 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 1720 int i; 1721 1722 for (i = 0; i < 4; i++) { 1723 if ((sin6->sin6_addr.s6_addr32[i] & 1724 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1725 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1726 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1727 break; 1728 } 1729 /* and, yeah, we need a multicast-capable iface too */ 1730 if (ia->ia_ifp != SC2IFP(sc) && 1731 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1732 (i == 4)) { 1733 if (!ia_if) 1734 ia_if = ia; 1735 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1736 &ia->ia_addr.sin6_addr)) 1737 own++; 1738 } 1739 } 1740 1741 if (!ia_if) { 1742 IN6_IFADDR_RUNLOCK(); 1743 return (EADDRNOTAVAIL); 1744 } 1745 ia = ia_if; 1746 ifa_ref(&ia->ia_ifa); 1747 IN6_IFADDR_RUNLOCK(); 1748 ifp = ia->ia_ifp; 1749 1750 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1751 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) { 1752 ifa_free(&ia->ia_ifa); 1753 return (EADDRNOTAVAIL); 1754 } 1755 1756 if (!sc->sc_naddrs6) { 1757 struct in6_multi *in6m; 1758 1759 im6o->im6o_multicast_ifp = ifp; 1760 1761 /* join CARP multicast address */ 1762 bzero(&in6, sizeof(in6)); 1763 in6.s6_addr16[0] = htons(0xff02); 1764 in6.s6_addr8[15] = 0x12; 1765 if (in6_setscope(&in6, ifp, NULL) != 0) 1766 goto cleanup; 1767 in6m = NULL; 1768 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1769 if (error) 1770 goto cleanup; 1771 im6o->im6o_membership[0] = in6m; 1772 im6o->im6o_num_memberships++; 1773 1774 /* join solicited multicast address */ 1775 bzero(&in6, sizeof(in6)); 1776 in6.s6_addr16[0] = htons(0xff02); 1777 in6.s6_addr32[1] = 0; 1778 in6.s6_addr32[2] = htonl(1); 1779 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1780 in6.s6_addr8[12] = 0xff; 1781 if (in6_setscope(&in6, ifp, NULL) != 0) 1782 goto cleanup; 1783 in6m = NULL; 1784 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1785 if (error) 1786 goto cleanup; 1787 im6o->im6o_membership[1] = in6m; 1788 im6o->im6o_num_memberships++; 1789 } 1790 1791 if (!ifp->if_carp) { 1792 cif = malloc(sizeof(*cif), M_CARP, 1793 M_WAITOK|M_ZERO); 1794 if (!cif) { 1795 error = ENOBUFS; 1796 goto cleanup; 1797 } 1798 if ((error = ifpromisc(ifp, 1))) { 1799 free(cif, M_CARP); 1800 goto cleanup; 1801 } 1802 1803 CARP_LOCK_INIT(cif); 1804 CARP_LOCK(cif); 1805 cif->vhif_ifp = ifp; 1806 TAILQ_INIT(&cif->vhif_vrs); 1807 ifp->if_carp = cif; 1808 1809 } else { 1810 struct carp_softc *vr; 1811 1812 cif = (struct carp_if *)ifp->if_carp; 1813 CARP_LOCK(cif); 1814 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1815 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1816 CARP_UNLOCK(cif); 1817 error = EINVAL; 1818 goto cleanup; 1819 } 1820 } 1821 sc->sc_ia6 = ia; 1822 sc->sc_carpdev = ifp; 1823 1824 { /* XXX prevent endless loop if already in queue */ 1825 struct carp_softc *vr, *after = NULL; 1826 int myself = 0; 1827 cif = (struct carp_if *)ifp->if_carp; 1828 CARP_LOCK_ASSERT(cif); 1829 1830 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1831 if (vr == sc) 1832 myself = 1; 1833 if (vr->sc_vhid < sc->sc_vhid) 1834 after = vr; 1835 } 1836 1837 if (!myself) { 1838 /* We're trying to keep things in order */ 1839 if (after == NULL) { 1840 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1841 } else { 1842 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1843 } 1844 cif->vhif_nvrs++; 1845 } 1846 } 1847 1848 sc->sc_naddrs6++; 1849 SC2IFP(sc)->if_flags |= IFF_UP; 1850 if (own) 1851 sc->sc_advskew = 0; 1852 carp_sc_state_locked(sc); 1853 carp_setrun(sc, 0); 1854 1855 CARP_UNLOCK(cif); 1856 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1857 1858 return (0); 1859 1860 cleanup: 1861 if (!sc->sc_naddrs6) 1862 carp_multicast6_cleanup(sc, 1); 1863 ifa_free(&ia->ia_ifa); 1864 return (error); 1865 } 1866 1867 static int 1868 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1869 { 1870 int error = 0; 1871 1872 if (!--sc->sc_naddrs6) { 1873 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1874 1875 CARP_LOCK(cif); 1876 callout_stop(&sc->sc_ad_tmo); 1877 SC2IFP(sc)->if_flags &= ~IFF_UP; 1878 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1879 sc->sc_vhid = -1; 1880 carp_multicast6_cleanup(sc, 1); 1881 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1882 if (!--cif->vhif_nvrs) { 1883 CARP_LOCK_DESTROY(cif); 1884 sc->sc_carpdev->if_carp = NULL; 1885 free(cif, M_CARP); 1886 } else 1887 CARP_UNLOCK(cif); 1888 } 1889 1890 return (error); 1891 } 1892 #endif /* INET6 */ 1893 1894 static int 1895 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1896 { 1897 struct carp_softc *sc = ifp->if_softc, *vr; 1898 struct carpreq carpr; 1899 struct ifaddr *ifa; 1900 struct ifreq *ifr; 1901 struct ifaliasreq *ifra; 1902 int locked = 0, error = 0; 1903 1904 ifa = (struct ifaddr *)addr; 1905 ifra = (struct ifaliasreq *)addr; 1906 ifr = (struct ifreq *)addr; 1907 1908 switch (cmd) { 1909 case SIOCSIFADDR: 1910 switch (ifa->ifa_addr->sa_family) { 1911 #ifdef INET 1912 case AF_INET: 1913 SC2IFP(sc)->if_flags |= IFF_UP; 1914 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1915 sizeof(struct sockaddr)); 1916 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1917 break; 1918 #endif /* INET */ 1919 #ifdef INET6 1920 case AF_INET6: 1921 SC2IFP(sc)->if_flags |= IFF_UP; 1922 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1923 break; 1924 #endif /* INET6 */ 1925 default: 1926 error = EAFNOSUPPORT; 1927 break; 1928 } 1929 break; 1930 1931 case SIOCAIFADDR: 1932 switch (ifa->ifa_addr->sa_family) { 1933 #ifdef INET 1934 case AF_INET: 1935 SC2IFP(sc)->if_flags |= IFF_UP; 1936 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1937 sizeof(struct sockaddr)); 1938 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1939 break; 1940 #endif /* INET */ 1941 #ifdef INET6 1942 case AF_INET6: 1943 SC2IFP(sc)->if_flags |= IFF_UP; 1944 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1945 break; 1946 #endif /* INET6 */ 1947 default: 1948 error = EAFNOSUPPORT; 1949 break; 1950 } 1951 break; 1952 1953 case SIOCDIFADDR: 1954 switch (ifa->ifa_addr->sa_family) { 1955 #ifdef INET 1956 case AF_INET: 1957 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1958 break; 1959 #endif /* INET */ 1960 #ifdef INET6 1961 case AF_INET6: 1962 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1963 break; 1964 #endif /* INET6 */ 1965 default: 1966 error = EAFNOSUPPORT; 1967 break; 1968 } 1969 break; 1970 1971 case SIOCSIFFLAGS: 1972 if (sc->sc_carpdev) { 1973 locked = 1; 1974 CARP_SCLOCK(sc); 1975 } 1976 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1977 callout_stop(&sc->sc_ad_tmo); 1978 callout_stop(&sc->sc_md_tmo); 1979 callout_stop(&sc->sc_md6_tmo); 1980 if (sc->sc_state == MASTER) 1981 carp_send_ad_locked(sc); 1982 carp_set_state(sc, INIT); 1983 carp_setrun(sc, 0); 1984 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1985 SC2IFP(sc)->if_flags |= IFF_UP; 1986 carp_setrun(sc, 0); 1987 } 1988 break; 1989 1990 case SIOCSVH: 1991 error = priv_check(curthread, PRIV_NETINET_CARP); 1992 if (error) 1993 break; 1994 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1995 break; 1996 error = 1; 1997 if (sc->sc_carpdev) { 1998 locked = 1; 1999 CARP_SCLOCK(sc); 2000 } 2001 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 2002 switch (carpr.carpr_state) { 2003 case BACKUP: 2004 callout_stop(&sc->sc_ad_tmo); 2005 carp_set_state(sc, BACKUP); 2006 carp_setrun(sc, 0); 2007 carp_setroute(sc, RTM_DELETE); 2008 break; 2009 case MASTER: 2010 carp_master_down_locked(sc); 2011 break; 2012 default: 2013 break; 2014 } 2015 } 2016 if (carpr.carpr_vhid > 0) { 2017 if (carpr.carpr_vhid > 255) { 2018 error = EINVAL; 2019 break; 2020 } 2021 if (sc->sc_carpdev) { 2022 struct carp_if *cif; 2023 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2024 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2025 if (vr != sc && 2026 vr->sc_vhid == carpr.carpr_vhid) { 2027 error = EEXIST; 2028 break; 2029 } 2030 if (error == EEXIST) 2031 break; 2032 } 2033 sc->sc_vhid = carpr.carpr_vhid; 2034 IF_LLADDR(sc->sc_ifp)[0] = 0; 2035 IF_LLADDR(sc->sc_ifp)[1] = 0; 2036 IF_LLADDR(sc->sc_ifp)[2] = 0x5e; 2037 IF_LLADDR(sc->sc_ifp)[3] = 0; 2038 IF_LLADDR(sc->sc_ifp)[4] = 1; 2039 IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; 2040 error--; 2041 } 2042 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2043 if (carpr.carpr_advskew >= 255) { 2044 error = EINVAL; 2045 break; 2046 } 2047 if (carpr.carpr_advbase > 255) { 2048 error = EINVAL; 2049 break; 2050 } 2051 sc->sc_advbase = carpr.carpr_advbase; 2052 sc->sc_advskew = carpr.carpr_advskew; 2053 error--; 2054 } 2055 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2056 if (error > 0) 2057 error = EINVAL; 2058 else { 2059 error = 0; 2060 carp_setrun(sc, 0); 2061 } 2062 break; 2063 2064 case SIOCGVH: 2065 /* XXX: lockless read */ 2066 bzero(&carpr, sizeof(carpr)); 2067 carpr.carpr_state = sc->sc_state; 2068 carpr.carpr_vhid = sc->sc_vhid; 2069 carpr.carpr_advbase = sc->sc_advbase; 2070 carpr.carpr_advskew = sc->sc_advskew; 2071 error = priv_check(curthread, PRIV_NETINET_CARP); 2072 if (error == 0) 2073 bcopy(sc->sc_key, carpr.carpr_key, 2074 sizeof(carpr.carpr_key)); 2075 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2076 break; 2077 2078 default: 2079 error = EINVAL; 2080 } 2081 2082 if (locked) 2083 CARP_SCUNLOCK(sc); 2084 2085 carp_hmac_prepare(sc); 2086 2087 return (error); 2088 } 2089 2090 /* 2091 * XXX: this is looutput. We should eventually use it from there. 2092 */ 2093 static int 2094 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2095 struct route *ro) 2096 { 2097 u_int32_t af; 2098 struct rtentry *rt = NULL; 2099 2100 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 2101 2102 if (ro != NULL) 2103 rt = ro->ro_rt; 2104 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2105 m_freem(m); 2106 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 2107 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 2108 } 2109 2110 ifp->if_opackets++; 2111 ifp->if_obytes += m->m_pkthdr.len; 2112 2113 /* BPF writes need to be handled specially. */ 2114 if (dst->sa_family == AF_UNSPEC) { 2115 bcopy(dst->sa_data, &af, sizeof(af)); 2116 dst->sa_family = af; 2117 } 2118 2119 #if 1 /* XXX */ 2120 switch (dst->sa_family) { 2121 case AF_INET: 2122 case AF_INET6: 2123 case AF_IPX: 2124 case AF_APPLETALK: 2125 break; 2126 default: 2127 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 2128 m_freem(m); 2129 return (EAFNOSUPPORT); 2130 } 2131 #endif 2132 return(if_simloop(ifp, m, dst->sa_family, 0)); 2133 } 2134 2135 /* 2136 * Start output on carp interface. This function should never be called. 2137 */ 2138 static void 2139 carp_start(struct ifnet *ifp) 2140 { 2141 #ifdef DEBUG 2142 printf("%s: start called\n", ifp->if_xname); 2143 #endif 2144 } 2145 2146 int 2147 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2148 struct rtentry *rt) 2149 { 2150 struct m_tag *mtag; 2151 struct carp_softc *sc; 2152 struct ifnet *carp_ifp; 2153 2154 if (!sa) 2155 return (0); 2156 2157 switch (sa->sa_family) { 2158 #ifdef INET 2159 case AF_INET: 2160 break; 2161 #endif /* INET */ 2162 #ifdef INET6 2163 case AF_INET6: 2164 break; 2165 #endif /* INET6 */ 2166 default: 2167 return (0); 2168 } 2169 2170 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2171 if (mtag == NULL) 2172 return (0); 2173 2174 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2175 sc = carp_ifp->if_softc; 2176 2177 /* Set the source MAC address to Virtual Router MAC Address */ 2178 switch (ifp->if_type) { 2179 case IFT_ETHER: 2180 case IFT_L2VLAN: { 2181 struct ether_header *eh; 2182 2183 eh = mtod(m, struct ether_header *); 2184 eh->ether_shost[0] = 0; 2185 eh->ether_shost[1] = 0; 2186 eh->ether_shost[2] = 0x5e; 2187 eh->ether_shost[3] = 0; 2188 eh->ether_shost[4] = 1; 2189 eh->ether_shost[5] = sc->sc_vhid; 2190 } 2191 break; 2192 case IFT_FDDI: { 2193 struct fddi_header *fh; 2194 2195 fh = mtod(m, struct fddi_header *); 2196 fh->fddi_shost[0] = 0; 2197 fh->fddi_shost[1] = 0; 2198 fh->fddi_shost[2] = 0x5e; 2199 fh->fddi_shost[3] = 0; 2200 fh->fddi_shost[4] = 1; 2201 fh->fddi_shost[5] = sc->sc_vhid; 2202 } 2203 break; 2204 case IFT_ISO88025: { 2205 struct iso88025_header *th; 2206 th = mtod(m, struct iso88025_header *); 2207 th->iso88025_shost[0] = 3; 2208 th->iso88025_shost[1] = 0; 2209 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2210 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2211 th->iso88025_shost[4] = 0; 2212 th->iso88025_shost[5] = 0; 2213 } 2214 break; 2215 default: 2216 printf("%s: carp is not supported for this interface type\n", 2217 ifp->if_xname); 2218 return (EOPNOTSUPP); 2219 } 2220 2221 return (0); 2222 } 2223 2224 static void 2225 carp_set_state(struct carp_softc *sc, int state) 2226 { 2227 int link_state; 2228 2229 if (sc->sc_carpdev) 2230 CARP_SCLOCK_ASSERT(sc); 2231 2232 if (sc->sc_state == state) 2233 return; 2234 2235 sc->sc_state = state; 2236 switch (state) { 2237 case BACKUP: 2238 link_state = LINK_STATE_DOWN; 2239 break; 2240 case MASTER: 2241 link_state = LINK_STATE_UP; 2242 break; 2243 default: 2244 link_state = LINK_STATE_UNKNOWN; 2245 break; 2246 } 2247 if_link_state_change(SC2IFP(sc), link_state); 2248 } 2249 2250 void 2251 carp_carpdev_state(struct ifnet *ifp) 2252 { 2253 struct carp_if *cif; 2254 2255 cif = ifp->if_carp; 2256 CARP_LOCK(cif); 2257 carp_carpdev_state_locked(cif); 2258 CARP_UNLOCK(cif); 2259 } 2260 2261 static void 2262 carp_carpdev_state_locked(struct carp_if *cif) 2263 { 2264 struct carp_softc *sc; 2265 2266 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2267 carp_sc_state_locked(sc); 2268 } 2269 2270 static void 2271 carp_sc_state_locked(struct carp_softc *sc) 2272 { 2273 CARP_SCLOCK_ASSERT(sc); 2274 2275 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2276 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2277 sc->sc_flags_backup = SC2IFP(sc)->if_flags; 2278 SC2IFP(sc)->if_flags &= ~IFF_UP; 2279 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 2280 callout_stop(&sc->sc_ad_tmo); 2281 callout_stop(&sc->sc_md_tmo); 2282 callout_stop(&sc->sc_md6_tmo); 2283 carp_set_state(sc, INIT); 2284 carp_setrun(sc, 0); 2285 if (!sc->sc_suppress) { 2286 carp_suppress_preempt++; 2287 if (carp_suppress_preempt == 1) { 2288 CARP_SCUNLOCK(sc); 2289 carp_send_ad_all(); 2290 CARP_SCLOCK(sc); 2291 } 2292 } 2293 sc->sc_suppress = 1; 2294 } else { 2295 SC2IFP(sc)->if_flags |= sc->sc_flags_backup; 2296 carp_set_state(sc, INIT); 2297 carp_setrun(sc, 0); 2298 if (sc->sc_suppress) 2299 carp_suppress_preempt--; 2300 sc->sc_suppress = 0; 2301 } 2302 2303 return; 2304 } 2305 2306 #ifdef INET 2307 extern struct domain inetdomain; 2308 static struct protosw in_carp_protosw = { 2309 .pr_type = SOCK_RAW, 2310 .pr_domain = &inetdomain, 2311 .pr_protocol = IPPROTO_CARP, 2312 .pr_flags = PR_ATOMIC|PR_ADDR, 2313 .pr_input = carp_input, 2314 .pr_output = (pr_output_t *)rip_output, 2315 .pr_ctloutput = rip_ctloutput, 2316 .pr_usrreqs = &rip_usrreqs 2317 }; 2318 #endif 2319 2320 #ifdef INET6 2321 extern struct domain inet6domain; 2322 static struct ip6protosw in6_carp_protosw = { 2323 .pr_type = SOCK_RAW, 2324 .pr_domain = &inet6domain, 2325 .pr_protocol = IPPROTO_CARP, 2326 .pr_flags = PR_ATOMIC|PR_ADDR, 2327 .pr_input = carp6_input, 2328 .pr_output = rip6_output, 2329 .pr_ctloutput = rip6_ctloutput, 2330 .pr_usrreqs = &rip6_usrreqs 2331 }; 2332 #endif 2333 2334 static void 2335 carp_mod_cleanup(void) 2336 { 2337 2338 if (if_detach_event_tag == NULL) 2339 return; 2340 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2341 if_clone_detach(&carp_cloner); 2342 #ifdef INET 2343 if (proto_reg[CARP_INET] == 0) { 2344 (void)ipproto_unregister(IPPROTO_CARP); 2345 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2346 proto_reg[CARP_INET] = -1; 2347 } 2348 carp_iamatch_p = NULL; 2349 #endif 2350 #ifdef INET6 2351 if (proto_reg[CARP_INET6] == 0) { 2352 (void)ip6proto_unregister(IPPROTO_CARP); 2353 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2354 proto_reg[CARP_INET6] = -1; 2355 } 2356 carp_iamatch6_p = NULL; 2357 carp_macmatch6_p = NULL; 2358 #endif 2359 carp_linkstate_p = NULL; 2360 carp_forus_p = NULL; 2361 carp_output_p = NULL; 2362 mtx_destroy(&carp_mtx); 2363 } 2364 2365 static int 2366 carp_mod_load(void) 2367 { 2368 int err; 2369 2370 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2371 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 2372 if (if_detach_event_tag == NULL) 2373 return (ENOMEM); 2374 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2375 LIST_INIT(&carpif_list); 2376 if_clone_attach(&carp_cloner); 2377 carp_linkstate_p = carp_carpdev_state; 2378 carp_forus_p = carp_forus; 2379 carp_output_p = carp_output; 2380 #ifdef INET6 2381 carp_iamatch6_p = carp_iamatch6; 2382 carp_macmatch6_p = carp_macmatch6; 2383 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2384 (struct protosw *)&in6_carp_protosw); 2385 if (proto_reg[CARP_INET6] != 0) { 2386 printf("carp: error %d attaching to PF_INET6\n", 2387 proto_reg[CARP_INET6]); 2388 carp_mod_cleanup(); 2389 return (proto_reg[CARP_INET6]); 2390 } 2391 err = ip6proto_register(IPPROTO_CARP); 2392 if (err) { 2393 printf("carp: error %d registering with INET6\n", err); 2394 carp_mod_cleanup(); 2395 return (err); 2396 } 2397 #endif 2398 #ifdef INET 2399 carp_iamatch_p = carp_iamatch; 2400 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2401 if (proto_reg[CARP_INET] != 0) { 2402 printf("carp: error %d attaching to PF_INET\n", 2403 proto_reg[CARP_INET]); 2404 carp_mod_cleanup(); 2405 return (proto_reg[CARP_INET]); 2406 } 2407 err = ipproto_register(IPPROTO_CARP); 2408 if (err) { 2409 printf("carp: error %d registering with INET\n", err); 2410 carp_mod_cleanup(); 2411 return (err); 2412 } 2413 #endif 2414 return 0; 2415 } 2416 2417 static int 2418 carp_modevent(module_t mod, int type, void *data) 2419 { 2420 switch (type) { 2421 case MOD_LOAD: 2422 return carp_mod_load(); 2423 /* NOTREACHED */ 2424 case MOD_UNLOAD: 2425 /* 2426 * XXX: For now, disallow module unloading by default due to 2427 * a race condition where a thread may dereference one of the 2428 * function pointer hooks after the module has been 2429 * unloaded, during processing of a packet, causing a panic. 2430 */ 2431 #ifdef CARPMOD_CAN_UNLOAD 2432 carp_mod_cleanup(); 2433 #else 2434 return (EBUSY); 2435 #endif 2436 break; 2437 2438 default: 2439 return (EINVAL); 2440 } 2441 2442 return (0); 2443 } 2444 2445 static moduledata_t carp_mod = { 2446 "carp", 2447 carp_modevent, 2448 0 2449 }; 2450 2451 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2452