1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/protosw.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/signalvar.h> 50 #include <sys/filio.h> 51 #include <sys/sockio.h> 52 53 #include <sys/socket.h> 54 #include <sys/vnode.h> 55 56 #include <machine/stdarg.h> 57 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 #include <net/fddi.h> 61 #include <net/iso88025.h> 62 #include <net/if.h> 63 #include <net/if_clone.h> 64 #include <net/if_dl.h> 65 #include <net/if_types.h> 66 #include <net/route.h> 67 #include <net/vnet.h> 68 69 #ifdef INET 70 #include <netinet/in.h> 71 #include <netinet/in_var.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/ip.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #include <machine/in_cksum.h> 77 #endif 78 79 #ifdef INET6 80 #include <netinet/icmp6.h> 81 #include <netinet/ip6.h> 82 #include <netinet6/ip6protosw.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet6/scope6_var.h> 85 #include <netinet6/nd6.h> 86 #endif 87 88 #include <crypto/sha1.h> 89 #include <netinet/ip_carp.h> 90 91 #define CARP_IFNAME "carp" 92 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 93 SYSCTL_DECL(_net_inet_carp); 94 95 struct carp_softc { 96 struct ifnet *sc_ifp; /* Interface clue */ 97 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 98 struct in_ifaddr *sc_ia; /* primary iface address */ 99 struct ip_moptions sc_imo; 100 #ifdef INET6 101 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 102 struct ip6_moptions sc_im6o; 103 #endif /* INET6 */ 104 TAILQ_ENTRY(carp_softc) sc_list; 105 106 enum { INIT = 0, BACKUP, MASTER } sc_state; 107 108 int sc_flags_backup; 109 int sc_suppress; 110 111 int sc_sendad_errors; 112 #define CARP_SENDAD_MAX_ERRORS 3 113 int sc_sendad_success; 114 #define CARP_SENDAD_MIN_SUCCESS 3 115 116 int sc_vhid; 117 int sc_advskew; 118 int sc_naddrs; 119 int sc_naddrs6; 120 int sc_advbase; /* seconds */ 121 int sc_init_counter; 122 u_int64_t sc_counter; 123 124 /* authentication */ 125 #define CARP_HMAC_PAD 64 126 unsigned char sc_key[CARP_KEY_LEN]; 127 unsigned char sc_pad[CARP_HMAC_PAD]; 128 SHA1_CTX sc_sha1; 129 130 struct callout sc_ad_tmo; /* advertisement timeout */ 131 struct callout sc_md_tmo; /* master down timeout */ 132 struct callout sc_md6_tmo; /* master down timeout */ 133 134 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 135 }; 136 #define SC2IFP(sc) ((sc)->sc_ifp) 137 138 /* These are external networking stack hooks for CARP */ 139 /* net/if.c */ 140 extern void (*carp_linkstate_p)(struct ifnet *); 141 /* net/if_bridge.c net/if_ethersubr.c */ 142 extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *); 143 /* net/if_ethersubr.c */ 144 extern int (*carp_output_p)(struct ifnet *, struct mbuf *, 145 struct sockaddr *, struct rtentry *); 146 #ifdef INET 147 /* netinet/if_ether.c */ 148 extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, 149 struct in_addr *, u_int8_t **); 150 #endif 151 #ifdef INET6 152 /* netinet6/nd6_nbr.c */ 153 extern struct ifaddr *(*carp_iamatch6_p)(struct ifnet *, struct in6_addr *); 154 extern caddr_t (*carp_macmatch6_p)(struct ifnet *, struct mbuf *, 155 const struct in6_addr *); 156 #endif 157 158 int carp_suppress_preempt = 0; 159 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 160 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 161 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 162 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 163 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 164 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 165 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 166 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 167 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 168 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 169 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 170 &carp_suppress_preempt, 0, "Preemption is suppressed"); 171 172 struct carpstats carpstats; 173 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 174 &carpstats, carpstats, 175 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 176 177 struct carp_if { 178 TAILQ_HEAD(, carp_softc) vhif_vrs; 179 int vhif_nvrs; 180 181 struct ifnet *vhif_ifp; 182 struct mtx vhif_mtx; 183 }; 184 185 #define CARP_INET 0 186 #define CARP_INET6 1 187 static int proto_reg[] = {-1, -1}; 188 189 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 190 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 191 192 /* lock per carp_if queue */ 193 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 194 NULL, MTX_DEF) 195 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 196 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 197 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 198 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 199 200 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 201 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 202 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 203 204 #define CARP_LOG(...) do { \ 205 if (carp_opts[CARPCTL_LOG] > 0) \ 206 log(LOG_INFO, __VA_ARGS__); \ 207 } while (0) 208 209 #define CARP_DEBUG(...) do { \ 210 if (carp_opts[CARPCTL_LOG] > 1) \ 211 log(LOG_DEBUG, __VA_ARGS__); \ 212 } while (0) 213 214 static void carp_hmac_prepare(struct carp_softc *); 215 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 216 unsigned char *); 217 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 218 unsigned char *); 219 static void carp_setroute(struct carp_softc *, int); 220 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 221 static int carp_clone_create(struct if_clone *, int, caddr_t); 222 static void carp_clone_destroy(struct ifnet *); 223 static void carpdetach(struct carp_softc *, int); 224 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 225 struct carp_header *); 226 static void carp_send_ad_all(void); 227 static void carp_send_ad(void *); 228 static void carp_send_ad_locked(struct carp_softc *); 229 static void carp_send_arp(struct carp_softc *); 230 static void carp_master_down(void *); 231 static void carp_master_down_locked(struct carp_softc *); 232 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 233 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 234 struct route *); 235 static void carp_start(struct ifnet *); 236 static void carp_setrun(struct carp_softc *, sa_family_t); 237 static void carp_set_state(struct carp_softc *, int); 238 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 239 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 240 241 static void carp_multicast_cleanup(struct carp_softc *); 242 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 243 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 244 static void carp_carpdev_state_locked(struct carp_if *); 245 static void carp_sc_state_locked(struct carp_softc *); 246 #ifdef INET6 247 static void carp_send_na(struct carp_softc *); 248 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 249 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 250 static void carp_multicast6_cleanup(struct carp_softc *); 251 #endif 252 253 static LIST_HEAD(, carp_softc) carpif_list; 254 static struct mtx carp_mtx; 255 IFC_SIMPLE_DECLARE(carp, 0); 256 257 static eventhandler_tag if_detach_event_tag; 258 259 static __inline u_int16_t 260 carp_cksum(struct mbuf *m, int len) 261 { 262 return (in_cksum(m, len)); 263 } 264 265 static void 266 carp_hmac_prepare(struct carp_softc *sc) 267 { 268 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 269 u_int8_t vhid = sc->sc_vhid & 0xff; 270 struct ifaddr *ifa; 271 int i, found; 272 #ifdef INET 273 struct in_addr last, cur, in; 274 #endif 275 #ifdef INET6 276 struct in6_addr last6, cur6, in6; 277 #endif 278 279 if (sc->sc_carpdev) 280 CARP_SCLOCK(sc); 281 282 /* XXX: possible race here */ 283 284 /* compute ipad from key */ 285 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 286 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 287 for (i = 0; i < sizeof(sc->sc_pad); i++) 288 sc->sc_pad[i] ^= 0x36; 289 290 /* precompute first part of inner hash */ 291 SHA1Init(&sc->sc_sha1); 292 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 293 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 294 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 295 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 296 #ifdef INET 297 cur.s_addr = 0; 298 do { 299 found = 0; 300 last = cur; 301 cur.s_addr = 0xffffffff; 302 IF_ADDR_LOCK(SC2IFP(sc)); 303 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 304 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 305 if (ifa->ifa_addr->sa_family == AF_INET && 306 ntohl(in.s_addr) > ntohl(last.s_addr) && 307 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 308 cur.s_addr = in.s_addr; 309 found++; 310 } 311 } 312 IF_ADDR_UNLOCK(SC2IFP(sc)); 313 if (found) 314 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 315 } while (found); 316 #endif /* INET */ 317 #ifdef INET6 318 memset(&cur6, 0, sizeof(cur6)); 319 do { 320 found = 0; 321 last6 = cur6; 322 memset(&cur6, 0xff, sizeof(cur6)); 323 IF_ADDR_LOCK(SC2IFP(sc)); 324 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 325 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 326 if (IN6_IS_SCOPE_EMBED(&in6)) 327 in6.s6_addr16[1] = 0; 328 if (ifa->ifa_addr->sa_family == AF_INET6 && 329 memcmp(&in6, &last6, sizeof(in6)) > 0 && 330 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 331 cur6 = in6; 332 found++; 333 } 334 } 335 IF_ADDR_UNLOCK(SC2IFP(sc)); 336 if (found) 337 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 338 } while (found); 339 #endif /* INET6 */ 340 341 /* convert ipad to opad */ 342 for (i = 0; i < sizeof(sc->sc_pad); i++) 343 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 344 345 if (sc->sc_carpdev) 346 CARP_SCUNLOCK(sc); 347 } 348 349 static void 350 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 351 unsigned char md[20]) 352 { 353 SHA1_CTX sha1ctx; 354 355 /* fetch first half of inner hash */ 356 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 357 358 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 359 SHA1Final(md, &sha1ctx); 360 361 /* outer hash */ 362 SHA1Init(&sha1ctx); 363 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 364 SHA1Update(&sha1ctx, md, 20); 365 SHA1Final(md, &sha1ctx); 366 } 367 368 static int 369 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 370 unsigned char md[20]) 371 { 372 unsigned char md2[20]; 373 374 CARP_SCLOCK_ASSERT(sc); 375 376 carp_hmac_generate(sc, counter, md2); 377 378 return (bcmp(md, md2, sizeof(md2))); 379 } 380 381 static void 382 carp_setroute(struct carp_softc *sc, int cmd) 383 { 384 struct ifaddr *ifa; 385 int s; 386 387 if (sc->sc_carpdev) 388 CARP_SCLOCK_ASSERT(sc); 389 390 s = splnet(); 391 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 392 if (ifa->ifa_addr->sa_family == AF_INET && 393 sc->sc_carpdev != NULL) { 394 int count = carp_addrcount( 395 (struct carp_if *)sc->sc_carpdev->if_carp, 396 ifatoia(ifa), CARP_COUNT_MASTER); 397 398 if ((cmd == RTM_ADD && count == 1) || 399 (cmd == RTM_DELETE && count == 0)) 400 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 401 } 402 } 403 splx(s); 404 } 405 406 static int 407 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) 408 { 409 410 struct carp_softc *sc; 411 struct ifnet *ifp; 412 413 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 414 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); 415 if (ifp == NULL) { 416 free(sc, M_CARP); 417 return (ENOSPC); 418 } 419 420 sc->sc_flags_backup = 0; 421 sc->sc_suppress = 0; 422 sc->sc_advbase = CARP_DFLTINTV; 423 sc->sc_vhid = -1; /* required setting */ 424 sc->sc_advskew = 0; 425 sc->sc_init_counter = 1; 426 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 427 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 428 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 429 M_WAITOK); 430 sc->sc_imo.imo_mfilters = NULL; 431 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 432 sc->sc_imo.imo_multicast_vif = -1; 433 #ifdef INET6 434 sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc( 435 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 436 M_WAITOK); 437 sc->sc_im6o.im6o_mfilters = NULL; 438 sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 439 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 440 #endif 441 442 callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE); 443 callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE); 444 callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE); 445 446 ifp->if_softc = sc; 447 if_initname(ifp, CARP_IFNAME, unit); 448 ifp->if_mtu = ETHERMTU; 449 ifp->if_flags = IFF_LOOPBACK; 450 ifp->if_ioctl = carp_ioctl; 451 ifp->if_output = carp_looutput; 452 ifp->if_start = carp_start; 453 ifp->if_type = IFT_CARP; 454 ifp->if_snd.ifq_maxlen = ifqmaxlen; 455 ifp->if_hdrlen = 0; 456 if_attach(ifp); 457 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 458 mtx_lock(&carp_mtx); 459 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 460 mtx_unlock(&carp_mtx); 461 return (0); 462 } 463 464 static void 465 carp_clone_destroy(struct ifnet *ifp) 466 { 467 struct carp_softc *sc = ifp->if_softc; 468 469 if (sc->sc_carpdev) 470 CARP_SCLOCK(sc); 471 carpdetach(sc, 1); /* Returns unlocked. */ 472 473 mtx_lock(&carp_mtx); 474 LIST_REMOVE(sc, sc_next); 475 mtx_unlock(&carp_mtx); 476 bpfdetach(ifp); 477 if_detach(ifp); 478 if_free_type(ifp, IFT_ETHER); 479 free(sc->sc_imo.imo_membership, M_CARP); 480 #ifdef INET6 481 free(sc->sc_im6o.im6o_membership, M_CARP); 482 #endif 483 free(sc, M_CARP); 484 } 485 486 /* 487 * This function can be called on CARP interface destroy path, 488 * and in case of the removal of the underlying interface as 489 * well. We differentiate these two cases. In the latter case 490 * we do not cleanup our multicast memberships, since they 491 * are already freed. Also, in the latter case we do not 492 * release the lock on return, because the function will be 493 * called once more, for another CARP instance on the same 494 * interface. 495 */ 496 static void 497 carpdetach(struct carp_softc *sc, int unlock) 498 { 499 struct carp_if *cif; 500 501 callout_stop(&sc->sc_ad_tmo); 502 callout_stop(&sc->sc_md_tmo); 503 callout_stop(&sc->sc_md6_tmo); 504 505 if (sc->sc_suppress) 506 carp_suppress_preempt--; 507 sc->sc_suppress = 0; 508 509 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 510 carp_suppress_preempt--; 511 sc->sc_sendad_errors = 0; 512 513 carp_set_state(sc, INIT); 514 SC2IFP(sc)->if_flags &= ~IFF_UP; 515 carp_setrun(sc, 0); 516 if (unlock) 517 carp_multicast_cleanup(sc); 518 #ifdef INET6 519 carp_multicast6_cleanup(sc); 520 #endif 521 522 if (sc->sc_carpdev != NULL) { 523 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 524 CARP_LOCK_ASSERT(cif); 525 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 526 if (!--cif->vhif_nvrs) { 527 ifpromisc(sc->sc_carpdev, 0); 528 sc->sc_carpdev->if_carp = NULL; 529 CARP_LOCK_DESTROY(cif); 530 free(cif, M_CARP); 531 } else if (unlock) 532 CARP_UNLOCK(cif); 533 sc->sc_carpdev = NULL; 534 } 535 } 536 537 /* Detach an interface from the carp. */ 538 static void 539 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 540 { 541 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 542 struct carp_softc *sc, *nextsc; 543 544 if (cif == NULL) 545 return; 546 547 /* 548 * XXX: At the end of for() cycle the lock will be destroyed. 549 */ 550 CARP_LOCK(cif); 551 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 552 nextsc = TAILQ_NEXT(sc, sc_list); 553 carpdetach(sc, 0); 554 } 555 } 556 557 /* 558 * process input packet. 559 * we have rearranged checks order compared to the rfc, 560 * but it seems more efficient this way or not possible otherwise. 561 */ 562 void 563 carp_input(struct mbuf *m, int hlen) 564 { 565 struct ip *ip = mtod(m, struct ip *); 566 struct carp_header *ch; 567 int iplen, len; 568 569 CARPSTATS_INC(carps_ipackets); 570 571 if (!carp_opts[CARPCTL_ALLOW]) { 572 m_freem(m); 573 return; 574 } 575 576 /* check if received on a valid carp interface */ 577 if (m->m_pkthdr.rcvif->if_carp == NULL) { 578 CARPSTATS_INC(carps_badif); 579 CARP_DEBUG("carp_input: packet received on non-carp " 580 "interface: %s\n", 581 m->m_pkthdr.rcvif->if_xname); 582 m_freem(m); 583 return; 584 } 585 586 /* verify that the IP TTL is 255. */ 587 if (ip->ip_ttl != CARP_DFLTTL) { 588 CARPSTATS_INC(carps_badttl); 589 CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n", 590 ip->ip_ttl, 591 m->m_pkthdr.rcvif->if_xname); 592 m_freem(m); 593 return; 594 } 595 596 iplen = ip->ip_hl << 2; 597 598 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 599 CARPSTATS_INC(carps_badlen); 600 CARP_DEBUG("carp_input: received len %zd < " 601 "sizeof(struct carp_header) on %s\n", 602 m->m_len - sizeof(struct ip), 603 m->m_pkthdr.rcvif->if_xname); 604 m_freem(m); 605 return; 606 } 607 608 if (iplen + sizeof(*ch) < m->m_len) { 609 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 610 CARPSTATS_INC(carps_hdrops); 611 CARP_DEBUG("carp_input: pullup failed\n"); 612 return; 613 } 614 ip = mtod(m, struct ip *); 615 } 616 ch = (struct carp_header *)((char *)ip + iplen); 617 618 /* 619 * verify that the received packet length is 620 * equal to the CARP header 621 */ 622 len = iplen + sizeof(*ch); 623 if (len > m->m_pkthdr.len) { 624 CARPSTATS_INC(carps_badlen); 625 CARP_DEBUG("carp_input: packet too short %d on %s\n", 626 m->m_pkthdr.len, 627 m->m_pkthdr.rcvif->if_xname); 628 m_freem(m); 629 return; 630 } 631 632 if ((m = m_pullup(m, len)) == NULL) { 633 CARPSTATS_INC(carps_hdrops); 634 return; 635 } 636 ip = mtod(m, struct ip *); 637 ch = (struct carp_header *)((char *)ip + iplen); 638 639 /* verify the CARP checksum */ 640 m->m_data += iplen; 641 if (carp_cksum(m, len - iplen)) { 642 CARPSTATS_INC(carps_badsum); 643 CARP_DEBUG("carp_input: checksum failed on %s\n", 644 m->m_pkthdr.rcvif->if_xname); 645 m_freem(m); 646 return; 647 } 648 m->m_data -= iplen; 649 650 carp_input_c(m, ch, AF_INET); 651 } 652 653 #ifdef INET6 654 int 655 carp6_input(struct mbuf **mp, int *offp, int proto) 656 { 657 struct mbuf *m = *mp; 658 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 659 struct carp_header *ch; 660 u_int len; 661 662 CARPSTATS_INC(carps_ipackets6); 663 664 if (!carp_opts[CARPCTL_ALLOW]) { 665 m_freem(m); 666 return (IPPROTO_DONE); 667 } 668 669 /* check if received on a valid carp interface */ 670 if (m->m_pkthdr.rcvif->if_carp == NULL) { 671 CARPSTATS_INC(carps_badif); 672 CARP_DEBUG("carp6_input: packet received on non-carp " 673 "interface: %s\n", 674 m->m_pkthdr.rcvif->if_xname); 675 m_freem(m); 676 return (IPPROTO_DONE); 677 } 678 679 /* verify that the IP TTL is 255 */ 680 if (ip6->ip6_hlim != CARP_DFLTTL) { 681 CARPSTATS_INC(carps_badttl); 682 CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n", 683 ip6->ip6_hlim, 684 m->m_pkthdr.rcvif->if_xname); 685 m_freem(m); 686 return (IPPROTO_DONE); 687 } 688 689 /* verify that we have a complete carp packet */ 690 len = m->m_len; 691 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 692 if (ch == NULL) { 693 CARPSTATS_INC(carps_badlen); 694 CARP_DEBUG("carp6_input: packet size %u too small\n", len); 695 return (IPPROTO_DONE); 696 } 697 698 699 /* verify the CARP checksum */ 700 m->m_data += *offp; 701 if (carp_cksum(m, sizeof(*ch))) { 702 CARPSTATS_INC(carps_badsum); 703 CARP_DEBUG("carp6_input: checksum failed, on %s\n", 704 m->m_pkthdr.rcvif->if_xname); 705 m_freem(m); 706 return (IPPROTO_DONE); 707 } 708 m->m_data -= *offp; 709 710 carp_input_c(m, ch, AF_INET6); 711 return (IPPROTO_DONE); 712 } 713 #endif /* INET6 */ 714 715 static void 716 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 717 { 718 struct ifnet *ifp = m->m_pkthdr.rcvif; 719 struct carp_softc *sc; 720 u_int64_t tmp_counter; 721 struct timeval sc_tv, ch_tv; 722 723 /* verify that the VHID is valid on the receiving interface */ 724 CARP_LOCK(ifp->if_carp); 725 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 726 if (sc->sc_vhid == ch->carp_vhid) 727 break; 728 729 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && 730 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 731 CARPSTATS_INC(carps_badvhid); 732 CARP_UNLOCK(ifp->if_carp); 733 m_freem(m); 734 return; 735 } 736 737 getmicrotime(&SC2IFP(sc)->if_lastchange); 738 SC2IFP(sc)->if_ipackets++; 739 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; 740 741 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { 742 struct ip *ip = mtod(m, struct ip *); 743 uint32_t af1 = af; 744 745 /* BPF wants net byte order */ 746 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 747 ip->ip_off = htons(ip->ip_off); 748 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); 749 } 750 751 /* verify the CARP version. */ 752 if (ch->carp_version != CARP_VERSION) { 753 CARPSTATS_INC(carps_badver); 754 SC2IFP(sc)->if_ierrors++; 755 CARP_UNLOCK(ifp->if_carp); 756 CARP_DEBUG("%s; invalid version %d\n", 757 SC2IFP(sc)->if_xname, 758 ch->carp_version); 759 m_freem(m); 760 return; 761 } 762 763 /* verify the hash */ 764 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 765 CARPSTATS_INC(carps_badauth); 766 SC2IFP(sc)->if_ierrors++; 767 CARP_UNLOCK(ifp->if_carp); 768 CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); 769 m_freem(m); 770 return; 771 } 772 773 tmp_counter = ntohl(ch->carp_counter[0]); 774 tmp_counter = tmp_counter<<32; 775 tmp_counter += ntohl(ch->carp_counter[1]); 776 777 /* XXX Replay protection goes here */ 778 779 sc->sc_init_counter = 0; 780 sc->sc_counter = tmp_counter; 781 782 sc_tv.tv_sec = sc->sc_advbase; 783 if (carp_suppress_preempt && sc->sc_advskew < 240) 784 sc_tv.tv_usec = 240 * 1000000 / 256; 785 else 786 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 787 ch_tv.tv_sec = ch->carp_advbase; 788 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 789 790 switch (sc->sc_state) { 791 case INIT: 792 break; 793 case MASTER: 794 /* 795 * If we receive an advertisement from a master who's going to 796 * be more frequent than us, go into BACKUP state. 797 */ 798 if (timevalcmp(&sc_tv, &ch_tv, >) || 799 timevalcmp(&sc_tv, &ch_tv, ==)) { 800 callout_stop(&sc->sc_ad_tmo); 801 CARP_LOG("%s: MASTER -> BACKUP " 802 "(more frequent advertisement received)\n", 803 SC2IFP(sc)->if_xname); 804 carp_set_state(sc, BACKUP); 805 carp_setrun(sc, 0); 806 carp_setroute(sc, RTM_DELETE); 807 } 808 break; 809 case BACKUP: 810 /* 811 * If we're pre-empting masters who advertise slower than us, 812 * and this one claims to be slower, treat him as down. 813 */ 814 if (carp_opts[CARPCTL_PREEMPT] && 815 timevalcmp(&sc_tv, &ch_tv, <)) { 816 CARP_LOG("%s: BACKUP -> MASTER " 817 "(preempting a slower master)\n", 818 SC2IFP(sc)->if_xname); 819 carp_master_down_locked(sc); 820 break; 821 } 822 823 /* 824 * If the master is going to advertise at such a low frequency 825 * that he's guaranteed to time out, we'd might as well just 826 * treat him as timed out now. 827 */ 828 sc_tv.tv_sec = sc->sc_advbase * 3; 829 if (timevalcmp(&sc_tv, &ch_tv, <)) { 830 CARP_LOG("%s: BACKUP -> MASTER " 831 "(master timed out)\n", 832 SC2IFP(sc)->if_xname); 833 carp_master_down_locked(sc); 834 break; 835 } 836 837 /* 838 * Otherwise, we reset the counter and wait for the next 839 * advertisement. 840 */ 841 carp_setrun(sc, af); 842 break; 843 } 844 845 CARP_UNLOCK(ifp->if_carp); 846 847 m_freem(m); 848 return; 849 } 850 851 static int 852 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 853 { 854 struct m_tag *mtag; 855 struct ifnet *ifp = SC2IFP(sc); 856 857 if (sc->sc_init_counter) { 858 /* this could also be seconds since unix epoch */ 859 sc->sc_counter = arc4random(); 860 sc->sc_counter = sc->sc_counter << 32; 861 sc->sc_counter += arc4random(); 862 } else 863 sc->sc_counter++; 864 865 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 866 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 867 868 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 869 870 /* Tag packet for carp_output */ 871 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 872 if (mtag == NULL) { 873 m_freem(m); 874 SC2IFP(sc)->if_oerrors++; 875 return (ENOMEM); 876 } 877 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 878 m_tag_prepend(m, mtag); 879 880 return (0); 881 } 882 883 static void 884 carp_send_ad_all(void) 885 { 886 struct carp_softc *sc; 887 888 mtx_lock(&carp_mtx); 889 LIST_FOREACH(sc, &carpif_list, sc_next) { 890 if (sc->sc_carpdev == NULL) 891 continue; 892 CARP_SCLOCK(sc); 893 if ((SC2IFP(sc)->if_flags & IFF_UP) && 894 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && 895 sc->sc_state == MASTER) 896 carp_send_ad_locked(sc); 897 CARP_SCUNLOCK(sc); 898 } 899 mtx_unlock(&carp_mtx); 900 } 901 902 static void 903 carp_send_ad(void *v) 904 { 905 struct carp_softc *sc = v; 906 907 CARP_SCLOCK(sc); 908 carp_send_ad_locked(sc); 909 CARP_SCUNLOCK(sc); 910 } 911 912 static void 913 carp_send_ad_locked(struct carp_softc *sc) 914 { 915 struct carp_header ch; 916 struct timeval tv; 917 struct carp_header *ch_ptr; 918 struct mbuf *m; 919 int len, advbase, advskew; 920 921 CARP_SCLOCK_ASSERT(sc); 922 923 /* bow out if we've lost our UPness or RUNNINGuiness */ 924 if (!((SC2IFP(sc)->if_flags & IFF_UP) && 925 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 926 advbase = 255; 927 advskew = 255; 928 } else { 929 advbase = sc->sc_advbase; 930 if (!carp_suppress_preempt || sc->sc_advskew > 240) 931 advskew = sc->sc_advskew; 932 else 933 advskew = 240; 934 tv.tv_sec = advbase; 935 tv.tv_usec = advskew * 1000000 / 256; 936 } 937 938 ch.carp_version = CARP_VERSION; 939 ch.carp_type = CARP_ADVERTISEMENT; 940 ch.carp_vhid = sc->sc_vhid; 941 ch.carp_advbase = advbase; 942 ch.carp_advskew = advskew; 943 ch.carp_authlen = 7; /* XXX DEFINE */ 944 ch.carp_pad1 = 0; /* must be zero */ 945 ch.carp_cksum = 0; 946 947 #ifdef INET 948 if (sc->sc_ia) { 949 struct ip *ip; 950 951 MGETHDR(m, M_DONTWAIT, MT_HEADER); 952 if (m == NULL) { 953 SC2IFP(sc)->if_oerrors++; 954 CARPSTATS_INC(carps_onomem); 955 /* XXX maybe less ? */ 956 if (advbase != 255 || advskew != 255) 957 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 958 carp_send_ad, sc); 959 return; 960 } 961 len = sizeof(*ip) + sizeof(ch); 962 m->m_pkthdr.len = len; 963 m->m_pkthdr.rcvif = NULL; 964 m->m_len = len; 965 MH_ALIGN(m, m->m_len); 966 m->m_flags |= M_MCAST; 967 ip = mtod(m, struct ip *); 968 ip->ip_v = IPVERSION; 969 ip->ip_hl = sizeof(*ip) >> 2; 970 ip->ip_tos = IPTOS_LOWDELAY; 971 ip->ip_len = len; 972 ip->ip_id = ip_newid(); 973 ip->ip_off = IP_DF; 974 ip->ip_ttl = CARP_DFLTTL; 975 ip->ip_p = IPPROTO_CARP; 976 ip->ip_sum = 0; 977 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 978 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 979 980 ch_ptr = (struct carp_header *)(&ip[1]); 981 bcopy(&ch, ch_ptr, sizeof(ch)); 982 if (carp_prepare_ad(m, sc, ch_ptr)) 983 return; 984 985 m->m_data += sizeof(*ip); 986 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 987 m->m_data -= sizeof(*ip); 988 989 getmicrotime(&SC2IFP(sc)->if_lastchange); 990 SC2IFP(sc)->if_opackets++; 991 SC2IFP(sc)->if_obytes += len; 992 CARPSTATS_INC(carps_opackets); 993 994 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 995 SC2IFP(sc)->if_oerrors++; 996 if (sc->sc_sendad_errors < INT_MAX) 997 sc->sc_sendad_errors++; 998 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 999 carp_suppress_preempt++; 1000 if (carp_suppress_preempt == 1) { 1001 CARP_SCUNLOCK(sc); 1002 carp_send_ad_all(); 1003 CARP_SCLOCK(sc); 1004 } 1005 } 1006 sc->sc_sendad_success = 0; 1007 } else { 1008 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1009 if (++sc->sc_sendad_success >= 1010 CARP_SENDAD_MIN_SUCCESS) { 1011 carp_suppress_preempt--; 1012 sc->sc_sendad_errors = 0; 1013 } 1014 } else 1015 sc->sc_sendad_errors = 0; 1016 } 1017 } 1018 #endif /* INET */ 1019 #ifdef INET6 1020 if (sc->sc_ia6) { 1021 struct ip6_hdr *ip6; 1022 1023 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1024 if (m == NULL) { 1025 SC2IFP(sc)->if_oerrors++; 1026 CARPSTATS_INC(carps_onomem); 1027 /* XXX maybe less ? */ 1028 if (advbase != 255 || advskew != 255) 1029 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1030 carp_send_ad, sc); 1031 return; 1032 } 1033 len = sizeof(*ip6) + sizeof(ch); 1034 m->m_pkthdr.len = len; 1035 m->m_pkthdr.rcvif = NULL; 1036 m->m_len = len; 1037 MH_ALIGN(m, m->m_len); 1038 m->m_flags |= M_MCAST; 1039 ip6 = mtod(m, struct ip6_hdr *); 1040 bzero(ip6, sizeof(*ip6)); 1041 ip6->ip6_vfc |= IPV6_VERSION; 1042 ip6->ip6_hlim = CARP_DFLTTL; 1043 ip6->ip6_nxt = IPPROTO_CARP; 1044 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1045 sizeof(struct in6_addr)); 1046 /* set the multicast destination */ 1047 1048 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1049 ip6->ip6_dst.s6_addr8[15] = 0x12; 1050 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1051 SC2IFP(sc)->if_oerrors++; 1052 m_freem(m); 1053 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1054 return; 1055 } 1056 1057 ch_ptr = (struct carp_header *)(&ip6[1]); 1058 bcopy(&ch, ch_ptr, sizeof(ch)); 1059 if (carp_prepare_ad(m, sc, ch_ptr)) 1060 return; 1061 1062 m->m_data += sizeof(*ip6); 1063 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1064 m->m_data -= sizeof(*ip6); 1065 1066 getmicrotime(&SC2IFP(sc)->if_lastchange); 1067 SC2IFP(sc)->if_opackets++; 1068 SC2IFP(sc)->if_obytes += len; 1069 CARPSTATS_INC(carps_opackets6); 1070 1071 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1072 SC2IFP(sc)->if_oerrors++; 1073 if (sc->sc_sendad_errors < INT_MAX) 1074 sc->sc_sendad_errors++; 1075 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1076 carp_suppress_preempt++; 1077 if (carp_suppress_preempt == 1) { 1078 CARP_SCUNLOCK(sc); 1079 carp_send_ad_all(); 1080 CARP_SCLOCK(sc); 1081 } 1082 } 1083 sc->sc_sendad_success = 0; 1084 } else { 1085 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1086 if (++sc->sc_sendad_success >= 1087 CARP_SENDAD_MIN_SUCCESS) { 1088 carp_suppress_preempt--; 1089 sc->sc_sendad_errors = 0; 1090 } 1091 } else 1092 sc->sc_sendad_errors = 0; 1093 } 1094 } 1095 #endif /* INET6 */ 1096 1097 if (advbase != 255 || advskew != 255) 1098 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1099 carp_send_ad, sc); 1100 1101 } 1102 1103 /* 1104 * Broadcast a gratuitous ARP request containing 1105 * the virtual router MAC address for each IP address 1106 * associated with the virtual router. 1107 */ 1108 static void 1109 carp_send_arp(struct carp_softc *sc) 1110 { 1111 struct ifaddr *ifa; 1112 1113 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1114 1115 if (ifa->ifa_addr->sa_family != AF_INET) 1116 continue; 1117 1118 /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ 1119 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); 1120 1121 DELAY(1000); /* XXX */ 1122 } 1123 } 1124 1125 #ifdef INET6 1126 static void 1127 carp_send_na(struct carp_softc *sc) 1128 { 1129 struct ifaddr *ifa; 1130 struct in6_addr *in6; 1131 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1132 1133 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1134 1135 if (ifa->ifa_addr->sa_family != AF_INET6) 1136 continue; 1137 1138 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1139 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1140 ND_NA_FLAG_OVERRIDE, 1, NULL); 1141 DELAY(1000); /* XXX */ 1142 } 1143 } 1144 #endif /* INET6 */ 1145 1146 static int 1147 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1148 { 1149 struct carp_softc *vh; 1150 struct ifaddr *ifa; 1151 int count = 0; 1152 1153 CARP_LOCK_ASSERT(cif); 1154 1155 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1156 if ((type == CARP_COUNT_RUNNING && 1157 (SC2IFP(vh)->if_flags & IFF_UP) && 1158 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || 1159 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1160 IF_ADDR_LOCK(SC2IFP(vh)); 1161 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1162 ifa_list) { 1163 if (ifa->ifa_addr->sa_family == AF_INET && 1164 ia->ia_addr.sin_addr.s_addr == 1165 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1166 count++; 1167 } 1168 IF_ADDR_UNLOCK(SC2IFP(vh)); 1169 } 1170 } 1171 return (count); 1172 } 1173 1174 int 1175 carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, 1176 struct in_addr *isaddr, u_int8_t **enaddr) 1177 { 1178 struct carp_if *cif; 1179 struct carp_softc *vh; 1180 int index, count = 0; 1181 struct ifaddr *ifa; 1182 1183 cif = ifp->if_carp; 1184 CARP_LOCK(cif); 1185 1186 if (carp_opts[CARPCTL_ARPBALANCE]) { 1187 /* 1188 * XXX proof of concept implementation. 1189 * We use the source ip to decide which virtual host should 1190 * handle the request. If we're master of that virtual host, 1191 * then we respond, otherwise, just drop the arp packet on 1192 * the floor. 1193 */ 1194 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1195 if (count == 0) { 1196 /* should never reach this */ 1197 CARP_UNLOCK(cif); 1198 return (0); 1199 } 1200 1201 /* this should be a hash, like pf_hash() */ 1202 index = ntohl(isaddr->s_addr) % count; 1203 count = 0; 1204 1205 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1206 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1207 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { 1208 IF_ADDR_LOCK(SC2IFP(vh)); 1209 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1210 ifa_list) { 1211 if (ifa->ifa_addr->sa_family == 1212 AF_INET && 1213 ia->ia_addr.sin_addr.s_addr == 1214 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1215 if (count == index) { 1216 if (vh->sc_state == 1217 MASTER) { 1218 *enaddr = IF_LLADDR(vh->sc_ifp); 1219 IF_ADDR_UNLOCK(SC2IFP(vh)); 1220 CARP_UNLOCK(cif); 1221 return (1); 1222 } else { 1223 IF_ADDR_UNLOCK(SC2IFP(vh)); 1224 CARP_UNLOCK(cif); 1225 return (0); 1226 } 1227 } 1228 count++; 1229 } 1230 } 1231 IF_ADDR_UNLOCK(SC2IFP(vh)); 1232 } 1233 } 1234 } else { 1235 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1236 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1237 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1238 ia->ia_ifp == SC2IFP(vh) && 1239 vh->sc_state == MASTER) { 1240 *enaddr = IF_LLADDR(vh->sc_ifp); 1241 CARP_UNLOCK(cif); 1242 return (1); 1243 } 1244 } 1245 } 1246 CARP_UNLOCK(cif); 1247 return (0); 1248 } 1249 1250 #ifdef INET6 1251 struct ifaddr * 1252 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1253 { 1254 struct carp_if *cif; 1255 struct carp_softc *vh; 1256 struct ifaddr *ifa; 1257 1258 cif = ifp->if_carp; 1259 CARP_LOCK(cif); 1260 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1261 IF_ADDR_LOCK(SC2IFP(vh)); 1262 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { 1263 if (IN6_ARE_ADDR_EQUAL(taddr, 1264 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1265 (SC2IFP(vh)->if_flags & IFF_UP) && 1266 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1267 vh->sc_state == MASTER) { 1268 ifa_ref(ifa); 1269 IF_ADDR_UNLOCK(SC2IFP(vh)); 1270 CARP_UNLOCK(cif); 1271 return (ifa); 1272 } 1273 } 1274 IF_ADDR_UNLOCK(SC2IFP(vh)); 1275 } 1276 CARP_UNLOCK(cif); 1277 1278 return (NULL); 1279 } 1280 1281 caddr_t 1282 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1283 { 1284 struct m_tag *mtag; 1285 struct carp_if *cif; 1286 struct carp_softc *sc; 1287 struct ifaddr *ifa; 1288 1289 cif = ifp->if_carp; 1290 CARP_LOCK(cif); 1291 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1292 IF_ADDR_LOCK(SC2IFP(sc)); 1293 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1294 if (IN6_ARE_ADDR_EQUAL(taddr, 1295 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1296 (SC2IFP(sc)->if_flags & IFF_UP) && 1297 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { 1298 struct ifnet *ifp = SC2IFP(sc); 1299 mtag = m_tag_get(PACKET_TAG_CARP, 1300 sizeof(struct ifnet *), M_NOWAIT); 1301 if (mtag == NULL) { 1302 /* better a bit than nothing */ 1303 IF_ADDR_UNLOCK(SC2IFP(sc)); 1304 CARP_UNLOCK(cif); 1305 return (IF_LLADDR(sc->sc_ifp)); 1306 } 1307 bcopy(&ifp, (caddr_t)(mtag + 1), 1308 sizeof(struct ifnet *)); 1309 m_tag_prepend(m, mtag); 1310 1311 IF_ADDR_UNLOCK(SC2IFP(sc)); 1312 CARP_UNLOCK(cif); 1313 return (IF_LLADDR(sc->sc_ifp)); 1314 } 1315 } 1316 IF_ADDR_UNLOCK(SC2IFP(sc)); 1317 } 1318 CARP_UNLOCK(cif); 1319 1320 return (NULL); 1321 } 1322 #endif 1323 1324 struct ifnet * 1325 carp_forus(struct ifnet *ifp, u_char *dhost) 1326 { 1327 struct carp_if *cif; 1328 struct carp_softc *vh; 1329 u_int8_t *ena = dhost; 1330 1331 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1332 return (NULL); 1333 1334 cif = ifp->if_carp; 1335 CARP_LOCK(cif); 1336 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1337 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1338 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1339 vh->sc_state == MASTER && 1340 !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { 1341 CARP_UNLOCK(cif); 1342 return (SC2IFP(vh)); 1343 } 1344 1345 CARP_UNLOCK(cif); 1346 return (NULL); 1347 } 1348 1349 static void 1350 carp_master_down(void *v) 1351 { 1352 struct carp_softc *sc = v; 1353 1354 CARP_SCLOCK(sc); 1355 carp_master_down_locked(sc); 1356 CARP_SCUNLOCK(sc); 1357 } 1358 1359 static void 1360 carp_master_down_locked(struct carp_softc *sc) 1361 { 1362 if (sc->sc_carpdev) 1363 CARP_SCLOCK_ASSERT(sc); 1364 1365 switch (sc->sc_state) { 1366 case INIT: 1367 printf("%s: master_down event in INIT state\n", 1368 SC2IFP(sc)->if_xname); 1369 break; 1370 case MASTER: 1371 break; 1372 case BACKUP: 1373 carp_set_state(sc, MASTER); 1374 carp_send_ad_locked(sc); 1375 carp_send_arp(sc); 1376 #ifdef INET6 1377 carp_send_na(sc); 1378 #endif /* INET6 */ 1379 carp_setrun(sc, 0); 1380 carp_setroute(sc, RTM_ADD); 1381 break; 1382 } 1383 } 1384 1385 /* 1386 * When in backup state, af indicates whether to reset the master down timer 1387 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1388 */ 1389 static void 1390 carp_setrun(struct carp_softc *sc, sa_family_t af) 1391 { 1392 struct timeval tv; 1393 1394 if (sc->sc_carpdev == NULL) { 1395 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1396 carp_set_state(sc, INIT); 1397 return; 1398 } else 1399 CARP_SCLOCK_ASSERT(sc); 1400 1401 if (SC2IFP(sc)->if_flags & IFF_UP && 1402 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) && 1403 sc->sc_carpdev->if_link_state == LINK_STATE_UP) 1404 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 1405 else { 1406 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1407 carp_setroute(sc, RTM_DELETE); 1408 return; 1409 } 1410 1411 switch (sc->sc_state) { 1412 case INIT: 1413 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1414 carp_send_ad_locked(sc); 1415 carp_send_arp(sc); 1416 #ifdef INET6 1417 carp_send_na(sc); 1418 #endif /* INET6 */ 1419 CARP_LOG("%s: INIT -> MASTER (preempting)\n", 1420 SC2IFP(sc)->if_xname); 1421 carp_set_state(sc, MASTER); 1422 carp_setroute(sc, RTM_ADD); 1423 } else { 1424 CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); 1425 carp_set_state(sc, BACKUP); 1426 carp_setroute(sc, RTM_DELETE); 1427 carp_setrun(sc, 0); 1428 } 1429 break; 1430 case BACKUP: 1431 callout_stop(&sc->sc_ad_tmo); 1432 tv.tv_sec = 3 * sc->sc_advbase; 1433 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1434 switch (af) { 1435 #ifdef INET 1436 case AF_INET: 1437 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1438 carp_master_down, sc); 1439 break; 1440 #endif /* INET */ 1441 #ifdef INET6 1442 case AF_INET6: 1443 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1444 carp_master_down, sc); 1445 break; 1446 #endif /* INET6 */ 1447 default: 1448 if (sc->sc_naddrs) 1449 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1450 carp_master_down, sc); 1451 if (sc->sc_naddrs6) 1452 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1453 carp_master_down, sc); 1454 break; 1455 } 1456 break; 1457 case MASTER: 1458 tv.tv_sec = sc->sc_advbase; 1459 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1460 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1461 carp_send_ad, sc); 1462 break; 1463 } 1464 } 1465 1466 static void 1467 carp_multicast_cleanup(struct carp_softc *sc) 1468 { 1469 struct ip_moptions *imo = &sc->sc_imo; 1470 u_int16_t n = imo->imo_num_memberships; 1471 1472 /* Clean up our own multicast memberships */ 1473 while (n-- > 0) { 1474 if (imo->imo_membership[n] != NULL) { 1475 in_delmulti(imo->imo_membership[n]); 1476 imo->imo_membership[n] = NULL; 1477 } 1478 } 1479 KASSERT(imo->imo_mfilters == NULL, 1480 ("%s: imo_mfilters != NULL", __func__)); 1481 imo->imo_num_memberships = 0; 1482 imo->imo_multicast_ifp = NULL; 1483 } 1484 1485 #ifdef INET6 1486 static void 1487 carp_multicast6_cleanup(struct carp_softc *sc) 1488 { 1489 struct ip6_moptions *im6o = &sc->sc_im6o; 1490 u_int16_t n = im6o->im6o_num_memberships; 1491 1492 while (n-- > 0) { 1493 if (im6o->im6o_membership[n] != NULL) { 1494 in6_mc_leave(im6o->im6o_membership[n], NULL); 1495 im6o->im6o_membership[n] = NULL; 1496 } 1497 } 1498 KASSERT(im6o->im6o_mfilters == NULL, 1499 ("%s: im6o_mfilters != NULL", __func__)); 1500 im6o->im6o_num_memberships = 0; 1501 im6o->im6o_multicast_ifp = NULL; 1502 } 1503 #endif 1504 1505 static int 1506 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1507 { 1508 struct ifnet *ifp; 1509 struct carp_if *cif; 1510 struct in_ifaddr *ia, *ia_if; 1511 struct ip_moptions *imo = &sc->sc_imo; 1512 struct in_addr addr; 1513 u_long iaddr = htonl(sin->sin_addr.s_addr); 1514 int own, error; 1515 1516 if (sin->sin_addr.s_addr == 0) { 1517 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1518 carp_set_state(sc, INIT); 1519 if (sc->sc_naddrs) 1520 SC2IFP(sc)->if_flags |= IFF_UP; 1521 if (sc->sc_carpdev) 1522 CARP_SCLOCK(sc); 1523 carp_setrun(sc, 0); 1524 if (sc->sc_carpdev) 1525 CARP_SCUNLOCK(sc); 1526 return (0); 1527 } 1528 1529 /* we have to do it by hands to check we won't match on us */ 1530 ia_if = NULL; own = 0; 1531 IN_IFADDR_RLOCK(); 1532 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1533 /* and, yeah, we need a multicast-capable iface too */ 1534 if (ia->ia_ifp != SC2IFP(sc) && 1535 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1536 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1537 if (!ia_if) 1538 ia_if = ia; 1539 if (sin->sin_addr.s_addr == 1540 ia->ia_addr.sin_addr.s_addr) 1541 own++; 1542 } 1543 } 1544 1545 if (!ia_if) { 1546 IN_IFADDR_RUNLOCK(); 1547 return (EADDRNOTAVAIL); 1548 } 1549 1550 ia = ia_if; 1551 ifa_ref(&ia->ia_ifa); 1552 IN_IFADDR_RUNLOCK(); 1553 1554 ifp = ia->ia_ifp; 1555 1556 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1557 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) { 1558 ifa_free(&ia->ia_ifa); 1559 return (EADDRNOTAVAIL); 1560 } 1561 1562 if (imo->imo_num_memberships == 0) { 1563 addr.s_addr = htonl(INADDR_CARP_GROUP); 1564 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == 1565 NULL) { 1566 ifa_free(&ia->ia_ifa); 1567 return (ENOBUFS); 1568 } 1569 imo->imo_num_memberships++; 1570 imo->imo_multicast_ifp = ifp; 1571 imo->imo_multicast_ttl = CARP_DFLTTL; 1572 imo->imo_multicast_loop = 0; 1573 } 1574 1575 if (!ifp->if_carp) { 1576 1577 cif = malloc(sizeof(*cif), M_CARP, 1578 M_WAITOK|M_ZERO); 1579 if (!cif) { 1580 error = ENOBUFS; 1581 goto cleanup; 1582 } 1583 if ((error = ifpromisc(ifp, 1))) { 1584 free(cif, M_CARP); 1585 goto cleanup; 1586 } 1587 1588 CARP_LOCK_INIT(cif); 1589 CARP_LOCK(cif); 1590 cif->vhif_ifp = ifp; 1591 TAILQ_INIT(&cif->vhif_vrs); 1592 ifp->if_carp = cif; 1593 1594 } else { 1595 struct carp_softc *vr; 1596 1597 cif = (struct carp_if *)ifp->if_carp; 1598 CARP_LOCK(cif); 1599 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1600 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1601 CARP_UNLOCK(cif); 1602 error = EEXIST; 1603 goto cleanup; 1604 } 1605 } 1606 sc->sc_ia = ia; 1607 sc->sc_carpdev = ifp; 1608 1609 { /* XXX prevent endless loop if already in queue */ 1610 struct carp_softc *vr, *after = NULL; 1611 int myself = 0; 1612 cif = (struct carp_if *)ifp->if_carp; 1613 1614 /* XXX: cif should not change, right? So we still hold the lock */ 1615 CARP_LOCK_ASSERT(cif); 1616 1617 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1618 if (vr == sc) 1619 myself = 1; 1620 if (vr->sc_vhid < sc->sc_vhid) 1621 after = vr; 1622 } 1623 1624 if (!myself) { 1625 /* We're trying to keep things in order */ 1626 if (after == NULL) { 1627 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1628 } else { 1629 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1630 } 1631 cif->vhif_nvrs++; 1632 } 1633 } 1634 1635 sc->sc_naddrs++; 1636 SC2IFP(sc)->if_flags |= IFF_UP; 1637 if (own) 1638 sc->sc_advskew = 0; 1639 carp_sc_state_locked(sc); 1640 carp_setrun(sc, 0); 1641 1642 CARP_UNLOCK(cif); 1643 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1644 1645 return (0); 1646 1647 cleanup: 1648 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1649 ifa_free(&ia->ia_ifa); 1650 return (error); 1651 } 1652 1653 static int 1654 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1655 { 1656 int error = 0; 1657 1658 if (!--sc->sc_naddrs) { 1659 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1660 struct ip_moptions *imo = &sc->sc_imo; 1661 1662 CARP_LOCK(cif); 1663 callout_stop(&sc->sc_ad_tmo); 1664 SC2IFP(sc)->if_flags &= ~IFF_UP; 1665 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1666 sc->sc_vhid = -1; 1667 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1668 imo->imo_multicast_ifp = NULL; 1669 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1670 if (!--cif->vhif_nvrs) { 1671 sc->sc_carpdev->if_carp = NULL; 1672 CARP_LOCK_DESTROY(cif); 1673 free(cif, M_CARP); 1674 } else { 1675 CARP_UNLOCK(cif); 1676 } 1677 } 1678 1679 return (error); 1680 } 1681 1682 #ifdef INET6 1683 static int 1684 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1685 { 1686 struct ifnet *ifp; 1687 struct carp_if *cif; 1688 struct in6_ifaddr *ia, *ia_if; 1689 struct ip6_moptions *im6o = &sc->sc_im6o; 1690 struct in6_addr in6; 1691 int own, error; 1692 1693 error = 0; 1694 1695 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1696 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1697 carp_set_state(sc, INIT); 1698 if (sc->sc_naddrs6) 1699 SC2IFP(sc)->if_flags |= IFF_UP; 1700 if (sc->sc_carpdev) 1701 CARP_SCLOCK(sc); 1702 carp_setrun(sc, 0); 1703 if (sc->sc_carpdev) 1704 CARP_SCUNLOCK(sc); 1705 return (0); 1706 } 1707 1708 /* we have to do it by hands to check we won't match on us */ 1709 ia_if = NULL; own = 0; 1710 IN6_IFADDR_RLOCK(); 1711 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 1712 int i; 1713 1714 for (i = 0; i < 4; i++) { 1715 if ((sin6->sin6_addr.s6_addr32[i] & 1716 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1717 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1718 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1719 break; 1720 } 1721 /* and, yeah, we need a multicast-capable iface too */ 1722 if (ia->ia_ifp != SC2IFP(sc) && 1723 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1724 (i == 4)) { 1725 if (!ia_if) 1726 ia_if = ia; 1727 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1728 &ia->ia_addr.sin6_addr)) 1729 own++; 1730 } 1731 } 1732 1733 if (!ia_if) { 1734 IN6_IFADDR_RUNLOCK(); 1735 return (EADDRNOTAVAIL); 1736 } 1737 ia = ia_if; 1738 ifa_ref(&ia->ia_ifa); 1739 IN6_IFADDR_RUNLOCK(); 1740 ifp = ia->ia_ifp; 1741 1742 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1743 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) { 1744 ifa_free(&ia->ia_ifa); 1745 return (EADDRNOTAVAIL); 1746 } 1747 1748 if (!sc->sc_naddrs6) { 1749 struct in6_multi *in6m; 1750 1751 im6o->im6o_multicast_ifp = ifp; 1752 1753 /* join CARP multicast address */ 1754 bzero(&in6, sizeof(in6)); 1755 in6.s6_addr16[0] = htons(0xff02); 1756 in6.s6_addr8[15] = 0x12; 1757 if (in6_setscope(&in6, ifp, NULL) != 0) 1758 goto cleanup; 1759 in6m = NULL; 1760 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1761 if (error) 1762 goto cleanup; 1763 im6o->im6o_membership[0] = in6m; 1764 im6o->im6o_num_memberships++; 1765 1766 /* join solicited multicast address */ 1767 bzero(&in6, sizeof(in6)); 1768 in6.s6_addr16[0] = htons(0xff02); 1769 in6.s6_addr32[1] = 0; 1770 in6.s6_addr32[2] = htonl(1); 1771 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1772 in6.s6_addr8[12] = 0xff; 1773 if (in6_setscope(&in6, ifp, NULL) != 0) 1774 goto cleanup; 1775 in6m = NULL; 1776 error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); 1777 if (error) 1778 goto cleanup; 1779 im6o->im6o_membership[1] = in6m; 1780 im6o->im6o_num_memberships++; 1781 } 1782 1783 if (!ifp->if_carp) { 1784 cif = malloc(sizeof(*cif), M_CARP, 1785 M_WAITOK|M_ZERO); 1786 if (!cif) { 1787 error = ENOBUFS; 1788 goto cleanup; 1789 } 1790 if ((error = ifpromisc(ifp, 1))) { 1791 free(cif, M_CARP); 1792 goto cleanup; 1793 } 1794 1795 CARP_LOCK_INIT(cif); 1796 CARP_LOCK(cif); 1797 cif->vhif_ifp = ifp; 1798 TAILQ_INIT(&cif->vhif_vrs); 1799 ifp->if_carp = cif; 1800 1801 } else { 1802 struct carp_softc *vr; 1803 1804 cif = (struct carp_if *)ifp->if_carp; 1805 CARP_LOCK(cif); 1806 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1807 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1808 CARP_UNLOCK(cif); 1809 error = EINVAL; 1810 goto cleanup; 1811 } 1812 } 1813 sc->sc_ia6 = ia; 1814 sc->sc_carpdev = ifp; 1815 1816 { /* XXX prevent endless loop if already in queue */ 1817 struct carp_softc *vr, *after = NULL; 1818 int myself = 0; 1819 cif = (struct carp_if *)ifp->if_carp; 1820 CARP_LOCK_ASSERT(cif); 1821 1822 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1823 if (vr == sc) 1824 myself = 1; 1825 if (vr->sc_vhid < sc->sc_vhid) 1826 after = vr; 1827 } 1828 1829 if (!myself) { 1830 /* We're trying to keep things in order */ 1831 if (after == NULL) { 1832 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1833 } else { 1834 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1835 } 1836 cif->vhif_nvrs++; 1837 } 1838 } 1839 1840 sc->sc_naddrs6++; 1841 SC2IFP(sc)->if_flags |= IFF_UP; 1842 if (own) 1843 sc->sc_advskew = 0; 1844 carp_sc_state_locked(sc); 1845 carp_setrun(sc, 0); 1846 1847 CARP_UNLOCK(cif); 1848 ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ 1849 1850 return (0); 1851 1852 cleanup: 1853 if (!sc->sc_naddrs6) 1854 carp_multicast6_cleanup(sc); 1855 ifa_free(&ia->ia_ifa); 1856 return (error); 1857 } 1858 1859 static int 1860 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1861 { 1862 int error = 0; 1863 1864 if (!--sc->sc_naddrs6) { 1865 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1866 1867 CARP_LOCK(cif); 1868 callout_stop(&sc->sc_ad_tmo); 1869 SC2IFP(sc)->if_flags &= ~IFF_UP; 1870 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1871 sc->sc_vhid = -1; 1872 carp_multicast6_cleanup(sc); 1873 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1874 if (!--cif->vhif_nvrs) { 1875 CARP_LOCK_DESTROY(cif); 1876 sc->sc_carpdev->if_carp = NULL; 1877 free(cif, M_CARP); 1878 } else 1879 CARP_UNLOCK(cif); 1880 } 1881 1882 return (error); 1883 } 1884 #endif /* INET6 */ 1885 1886 static int 1887 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1888 { 1889 struct carp_softc *sc = ifp->if_softc, *vr; 1890 struct carpreq carpr; 1891 struct ifaddr *ifa; 1892 struct ifreq *ifr; 1893 struct ifaliasreq *ifra; 1894 int locked = 0, error = 0; 1895 1896 ifa = (struct ifaddr *)addr; 1897 ifra = (struct ifaliasreq *)addr; 1898 ifr = (struct ifreq *)addr; 1899 1900 switch (cmd) { 1901 case SIOCSIFADDR: 1902 switch (ifa->ifa_addr->sa_family) { 1903 #ifdef INET 1904 case AF_INET: 1905 SC2IFP(sc)->if_flags |= IFF_UP; 1906 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1907 sizeof(struct sockaddr)); 1908 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1909 break; 1910 #endif /* INET */ 1911 #ifdef INET6 1912 case AF_INET6: 1913 SC2IFP(sc)->if_flags |= IFF_UP; 1914 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1915 break; 1916 #endif /* INET6 */ 1917 default: 1918 error = EAFNOSUPPORT; 1919 break; 1920 } 1921 break; 1922 1923 case SIOCAIFADDR: 1924 switch (ifa->ifa_addr->sa_family) { 1925 #ifdef INET 1926 case AF_INET: 1927 SC2IFP(sc)->if_flags |= IFF_UP; 1928 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1929 sizeof(struct sockaddr)); 1930 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1931 break; 1932 #endif /* INET */ 1933 #ifdef INET6 1934 case AF_INET6: 1935 SC2IFP(sc)->if_flags |= IFF_UP; 1936 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1937 break; 1938 #endif /* INET6 */ 1939 default: 1940 error = EAFNOSUPPORT; 1941 break; 1942 } 1943 break; 1944 1945 case SIOCDIFADDR: 1946 switch (ifa->ifa_addr->sa_family) { 1947 #ifdef INET 1948 case AF_INET: 1949 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1950 break; 1951 #endif /* INET */ 1952 #ifdef INET6 1953 case AF_INET6: 1954 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1955 break; 1956 #endif /* INET6 */ 1957 default: 1958 error = EAFNOSUPPORT; 1959 break; 1960 } 1961 break; 1962 1963 case SIOCSIFFLAGS: 1964 if (sc->sc_carpdev) { 1965 locked = 1; 1966 CARP_SCLOCK(sc); 1967 } 1968 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1969 callout_stop(&sc->sc_ad_tmo); 1970 callout_stop(&sc->sc_md_tmo); 1971 callout_stop(&sc->sc_md6_tmo); 1972 if (sc->sc_state == MASTER) 1973 carp_send_ad_locked(sc); 1974 carp_set_state(sc, INIT); 1975 carp_setrun(sc, 0); 1976 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1977 SC2IFP(sc)->if_flags |= IFF_UP; 1978 carp_setrun(sc, 0); 1979 } 1980 break; 1981 1982 case SIOCSVH: 1983 error = priv_check(curthread, PRIV_NETINET_CARP); 1984 if (error) 1985 break; 1986 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1987 break; 1988 error = 1; 1989 if (sc->sc_carpdev) { 1990 locked = 1; 1991 CARP_SCLOCK(sc); 1992 } 1993 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1994 switch (carpr.carpr_state) { 1995 case BACKUP: 1996 callout_stop(&sc->sc_ad_tmo); 1997 carp_set_state(sc, BACKUP); 1998 carp_setrun(sc, 0); 1999 carp_setroute(sc, RTM_DELETE); 2000 break; 2001 case MASTER: 2002 carp_master_down_locked(sc); 2003 break; 2004 default: 2005 break; 2006 } 2007 } 2008 if (carpr.carpr_vhid > 0) { 2009 if (carpr.carpr_vhid > 255) { 2010 error = EINVAL; 2011 break; 2012 } 2013 if (sc->sc_carpdev) { 2014 struct carp_if *cif; 2015 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2016 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 2017 if (vr != sc && 2018 vr->sc_vhid == carpr.carpr_vhid) { 2019 error = EEXIST; 2020 break; 2021 } 2022 if (error == EEXIST) 2023 break; 2024 } 2025 sc->sc_vhid = carpr.carpr_vhid; 2026 IF_LLADDR(sc->sc_ifp)[0] = 0; 2027 IF_LLADDR(sc->sc_ifp)[1] = 0; 2028 IF_LLADDR(sc->sc_ifp)[2] = 0x5e; 2029 IF_LLADDR(sc->sc_ifp)[3] = 0; 2030 IF_LLADDR(sc->sc_ifp)[4] = 1; 2031 IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; 2032 error--; 2033 } 2034 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 2035 if (carpr.carpr_advskew >= 255) { 2036 error = EINVAL; 2037 break; 2038 } 2039 if (carpr.carpr_advbase > 255) { 2040 error = EINVAL; 2041 break; 2042 } 2043 sc->sc_advbase = carpr.carpr_advbase; 2044 sc->sc_advskew = carpr.carpr_advskew; 2045 error--; 2046 } 2047 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2048 if (error > 0) 2049 error = EINVAL; 2050 else { 2051 error = 0; 2052 carp_setrun(sc, 0); 2053 } 2054 break; 2055 2056 case SIOCGVH: 2057 /* XXX: lockless read */ 2058 bzero(&carpr, sizeof(carpr)); 2059 carpr.carpr_state = sc->sc_state; 2060 carpr.carpr_vhid = sc->sc_vhid; 2061 carpr.carpr_advbase = sc->sc_advbase; 2062 carpr.carpr_advskew = sc->sc_advskew; 2063 error = priv_check(curthread, PRIV_NETINET_CARP); 2064 if (error == 0) 2065 bcopy(sc->sc_key, carpr.carpr_key, 2066 sizeof(carpr.carpr_key)); 2067 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2068 break; 2069 2070 default: 2071 error = EINVAL; 2072 } 2073 2074 if (locked) 2075 CARP_SCUNLOCK(sc); 2076 2077 carp_hmac_prepare(sc); 2078 2079 return (error); 2080 } 2081 2082 /* 2083 * XXX: this is looutput. We should eventually use it from there. 2084 */ 2085 static int 2086 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2087 struct route *ro) 2088 { 2089 u_int32_t af; 2090 struct rtentry *rt = NULL; 2091 2092 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 2093 2094 if (ro != NULL) 2095 rt = ro->ro_rt; 2096 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2097 m_freem(m); 2098 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 2099 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 2100 } 2101 2102 ifp->if_opackets++; 2103 ifp->if_obytes += m->m_pkthdr.len; 2104 2105 /* BPF writes need to be handled specially. */ 2106 if (dst->sa_family == AF_UNSPEC) { 2107 bcopy(dst->sa_data, &af, sizeof(af)); 2108 dst->sa_family = af; 2109 } 2110 2111 #if 1 /* XXX */ 2112 switch (dst->sa_family) { 2113 case AF_INET: 2114 case AF_INET6: 2115 case AF_IPX: 2116 case AF_APPLETALK: 2117 break; 2118 default: 2119 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 2120 m_freem(m); 2121 return (EAFNOSUPPORT); 2122 } 2123 #endif 2124 return(if_simloop(ifp, m, dst->sa_family, 0)); 2125 } 2126 2127 /* 2128 * Start output on carp interface. This function should never be called. 2129 */ 2130 static void 2131 carp_start(struct ifnet *ifp) 2132 { 2133 #ifdef DEBUG 2134 printf("%s: start called\n", ifp->if_xname); 2135 #endif 2136 } 2137 2138 int 2139 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2140 struct rtentry *rt) 2141 { 2142 struct m_tag *mtag; 2143 struct carp_softc *sc; 2144 struct ifnet *carp_ifp; 2145 2146 if (!sa) 2147 return (0); 2148 2149 switch (sa->sa_family) { 2150 #ifdef INET 2151 case AF_INET: 2152 break; 2153 #endif /* INET */ 2154 #ifdef INET6 2155 case AF_INET6: 2156 break; 2157 #endif /* INET6 */ 2158 default: 2159 return (0); 2160 } 2161 2162 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2163 if (mtag == NULL) 2164 return (0); 2165 2166 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2167 sc = carp_ifp->if_softc; 2168 2169 /* Set the source MAC address to Virtual Router MAC Address */ 2170 switch (ifp->if_type) { 2171 case IFT_ETHER: 2172 case IFT_L2VLAN: { 2173 struct ether_header *eh; 2174 2175 eh = mtod(m, struct ether_header *); 2176 eh->ether_shost[0] = 0; 2177 eh->ether_shost[1] = 0; 2178 eh->ether_shost[2] = 0x5e; 2179 eh->ether_shost[3] = 0; 2180 eh->ether_shost[4] = 1; 2181 eh->ether_shost[5] = sc->sc_vhid; 2182 } 2183 break; 2184 case IFT_FDDI: { 2185 struct fddi_header *fh; 2186 2187 fh = mtod(m, struct fddi_header *); 2188 fh->fddi_shost[0] = 0; 2189 fh->fddi_shost[1] = 0; 2190 fh->fddi_shost[2] = 0x5e; 2191 fh->fddi_shost[3] = 0; 2192 fh->fddi_shost[4] = 1; 2193 fh->fddi_shost[5] = sc->sc_vhid; 2194 } 2195 break; 2196 case IFT_ISO88025: { 2197 struct iso88025_header *th; 2198 th = mtod(m, struct iso88025_header *); 2199 th->iso88025_shost[0] = 3; 2200 th->iso88025_shost[1] = 0; 2201 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2202 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2203 th->iso88025_shost[4] = 0; 2204 th->iso88025_shost[5] = 0; 2205 } 2206 break; 2207 default: 2208 printf("%s: carp is not supported for this interface type\n", 2209 ifp->if_xname); 2210 return (EOPNOTSUPP); 2211 } 2212 2213 return (0); 2214 } 2215 2216 static void 2217 carp_set_state(struct carp_softc *sc, int state) 2218 { 2219 int link_state; 2220 2221 if (sc->sc_carpdev) 2222 CARP_SCLOCK_ASSERT(sc); 2223 2224 if (sc->sc_state == state) 2225 return; 2226 2227 sc->sc_state = state; 2228 switch (state) { 2229 case BACKUP: 2230 link_state = LINK_STATE_DOWN; 2231 break; 2232 case MASTER: 2233 link_state = LINK_STATE_UP; 2234 break; 2235 default: 2236 link_state = LINK_STATE_UNKNOWN; 2237 break; 2238 } 2239 if_link_state_change(SC2IFP(sc), link_state); 2240 } 2241 2242 void 2243 carp_carpdev_state(struct ifnet *ifp) 2244 { 2245 struct carp_if *cif; 2246 2247 cif = ifp->if_carp; 2248 CARP_LOCK(cif); 2249 carp_carpdev_state_locked(cif); 2250 CARP_UNLOCK(cif); 2251 } 2252 2253 static void 2254 carp_carpdev_state_locked(struct carp_if *cif) 2255 { 2256 struct carp_softc *sc; 2257 2258 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2259 carp_sc_state_locked(sc); 2260 } 2261 2262 static void 2263 carp_sc_state_locked(struct carp_softc *sc) 2264 { 2265 CARP_SCLOCK_ASSERT(sc); 2266 2267 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2268 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2269 sc->sc_flags_backup = SC2IFP(sc)->if_flags; 2270 SC2IFP(sc)->if_flags &= ~IFF_UP; 2271 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 2272 callout_stop(&sc->sc_ad_tmo); 2273 callout_stop(&sc->sc_md_tmo); 2274 callout_stop(&sc->sc_md6_tmo); 2275 carp_set_state(sc, INIT); 2276 carp_setrun(sc, 0); 2277 if (!sc->sc_suppress) { 2278 carp_suppress_preempt++; 2279 if (carp_suppress_preempt == 1) { 2280 CARP_SCUNLOCK(sc); 2281 carp_send_ad_all(); 2282 CARP_SCLOCK(sc); 2283 } 2284 } 2285 sc->sc_suppress = 1; 2286 } else { 2287 SC2IFP(sc)->if_flags |= sc->sc_flags_backup; 2288 carp_set_state(sc, INIT); 2289 carp_setrun(sc, 0); 2290 if (sc->sc_suppress) 2291 carp_suppress_preempt--; 2292 sc->sc_suppress = 0; 2293 } 2294 2295 return; 2296 } 2297 2298 #ifdef INET 2299 extern struct domain inetdomain; 2300 static struct protosw in_carp_protosw = { 2301 .pr_type = SOCK_RAW, 2302 .pr_domain = &inetdomain, 2303 .pr_protocol = IPPROTO_CARP, 2304 .pr_flags = PR_ATOMIC|PR_ADDR, 2305 .pr_input = carp_input, 2306 .pr_output = (pr_output_t *)rip_output, 2307 .pr_ctloutput = rip_ctloutput, 2308 .pr_usrreqs = &rip_usrreqs 2309 }; 2310 #endif 2311 2312 #ifdef INET6 2313 extern struct domain inet6domain; 2314 static struct ip6protosw in6_carp_protosw = { 2315 .pr_type = SOCK_RAW, 2316 .pr_domain = &inet6domain, 2317 .pr_protocol = IPPROTO_CARP, 2318 .pr_flags = PR_ATOMIC|PR_ADDR, 2319 .pr_input = carp6_input, 2320 .pr_output = rip6_output, 2321 .pr_ctloutput = rip6_ctloutput, 2322 .pr_usrreqs = &rip6_usrreqs 2323 }; 2324 #endif 2325 2326 static void 2327 carp_mod_cleanup(void) 2328 { 2329 2330 if (if_detach_event_tag == NULL) 2331 return; 2332 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2333 if_clone_detach(&carp_cloner); 2334 #ifdef INET 2335 if (proto_reg[CARP_INET] == 0) { 2336 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2337 proto_reg[CARP_INET] = -1; 2338 } 2339 carp_iamatch_p = NULL; 2340 #endif 2341 #ifdef INET6 2342 if (proto_reg[CARP_INET6] == 0) { 2343 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2344 proto_reg[CARP_INET6] = -1; 2345 } 2346 carp_iamatch6_p = NULL; 2347 carp_macmatch6_p = NULL; 2348 #endif 2349 carp_linkstate_p = NULL; 2350 carp_forus_p = NULL; 2351 carp_output_p = NULL; 2352 mtx_destroy(&carp_mtx); 2353 } 2354 2355 static int 2356 carp_mod_load(void) 2357 { 2358 2359 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2360 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 2361 if (if_detach_event_tag == NULL) 2362 return (ENOMEM); 2363 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2364 LIST_INIT(&carpif_list); 2365 if_clone_attach(&carp_cloner); 2366 carp_linkstate_p = carp_carpdev_state; 2367 carp_forus_p = carp_forus; 2368 carp_output_p = carp_output; 2369 #ifdef INET6 2370 carp_iamatch6_p = carp_iamatch6; 2371 carp_macmatch6_p = carp_macmatch6; 2372 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2373 (struct protosw *)&in6_carp_protosw); 2374 if (proto_reg[CARP_INET6] != 0) { 2375 printf("carp: error %d attaching to PF_INET6\n", 2376 proto_reg[CARP_INET6]); 2377 carp_mod_cleanup(); 2378 return (EINVAL); 2379 } 2380 #endif 2381 #ifdef INET 2382 carp_iamatch_p = carp_iamatch; 2383 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2384 if (proto_reg[CARP_INET] != 0) { 2385 printf("carp: error %d attaching to PF_INET\n", 2386 proto_reg[CARP_INET]); 2387 carp_mod_cleanup(); 2388 return (EINVAL); 2389 } 2390 #endif 2391 return 0; 2392 } 2393 2394 static int 2395 carp_modevent(module_t mod, int type, void *data) 2396 { 2397 switch (type) { 2398 case MOD_LOAD: 2399 return carp_mod_load(); 2400 /* NOTREACHED */ 2401 case MOD_UNLOAD: 2402 /* 2403 * XXX: For now, disallow module unloading by default due to 2404 * a race condition where a thread may dereference one of the 2405 * function pointer hooks after the module has been 2406 * unloaded, during processing of a packet, causing a panic. 2407 */ 2408 #ifdef CARPMOD_CAN_UNLOAD 2409 carp_mod_cleanup(); 2410 #else 2411 return (EBUSY); 2412 #endif 2413 break; 2414 2415 default: 2416 return (EINVAL); 2417 } 2418 2419 return (0); 2420 } 2421 2422 static moduledata_t carp_mod = { 2423 "carp", 2424 carp_modevent, 2425 0 2426 }; 2427 2428 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2429