1 /* $FreeBSD$ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_carp.h" 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/signalvar.h> 49 #include <sys/filio.h> 50 #include <sys/sockio.h> 51 52 #include <sys/socket.h> 53 #include <sys/vnode.h> 54 55 #include <machine/stdarg.h> 56 57 #include <net/bpf.h> 58 #include <net/ethernet.h> 59 #include <net/fddi.h> 60 #include <net/iso88025.h> 61 #include <net/if.h> 62 #include <net/if_clone.h> 63 #include <net/if_dl.h> 64 #include <net/if_types.h> 65 #include <net/route.h> 66 67 #ifdef INET 68 #include <netinet/in.h> 69 #include <netinet/in_var.h> 70 #include <netinet/in_systm.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip_var.h> 73 #include <netinet/if_ether.h> 74 #include <machine/in_cksum.h> 75 #endif 76 77 #ifdef INET6 78 #include <netinet/icmp6.h> 79 #include <netinet/ip6.h> 80 #include <netinet6/ip6_var.h> 81 #include <netinet6/scope6_var.h> 82 #include <netinet6/nd6.h> 83 #endif 84 85 #include <crypto/sha1.h> 86 #include <netinet/ip_carp.h> 87 88 #define CARP_IFNAME "carp" 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 90 SYSCTL_DECL(_net_inet_carp); 91 92 struct carp_softc { 93 struct ifnet *sc_ifp; /* Interface clue */ 94 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 95 struct in_ifaddr *sc_ia; /* primary iface address */ 96 struct ip_moptions sc_imo; 97 #ifdef INET6 98 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 99 struct ip6_moptions sc_im6o; 100 #endif /* INET6 */ 101 TAILQ_ENTRY(carp_softc) sc_list; 102 103 enum { INIT = 0, BACKUP, MASTER } sc_state; 104 105 int sc_flags_backup; 106 int sc_suppress; 107 108 int sc_sendad_errors; 109 #define CARP_SENDAD_MAX_ERRORS 3 110 int sc_sendad_success; 111 #define CARP_SENDAD_MIN_SUCCESS 3 112 113 int sc_vhid; 114 int sc_advskew; 115 int sc_naddrs; 116 int sc_naddrs6; 117 int sc_advbase; /* seconds */ 118 int sc_init_counter; 119 u_int64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 struct callout sc_ad_tmo; /* advertisement timeout */ 128 struct callout sc_md_tmo; /* master down timeout */ 129 struct callout sc_md6_tmo; /* master down timeout */ 130 131 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 132 }; 133 #define SC2IFP(sc) ((sc)->sc_ifp) 134 135 int carp_suppress_preempt = 0; 136 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 137 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 138 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 139 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 140 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 141 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 142 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 143 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 144 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 145 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 146 &carp_suppress_preempt, 0, "Preemption is suppressed"); 147 148 struct carpstats carpstats; 149 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 150 &carpstats, carpstats, 151 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 152 153 struct carp_if { 154 TAILQ_HEAD(, carp_softc) vhif_vrs; 155 int vhif_nvrs; 156 157 struct ifnet *vhif_ifp; 158 struct mtx vhif_mtx; 159 }; 160 161 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 162 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 163 164 /* lock per carp_if queue */ 165 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 166 NULL, MTX_DEF) 167 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 168 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 169 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 170 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 171 172 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 173 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 174 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 175 176 #define CARP_LOG(...) do { \ 177 if (carp_opts[CARPCTL_LOG] > 0) \ 178 log(LOG_INFO, __VA_ARGS__); \ 179 } while (0) 180 181 #define CARP_DEBUG(...) do { \ 182 if (carp_opts[CARPCTL_LOG] > 1) \ 183 log(LOG_DEBUG, __VA_ARGS__); \ 184 } while (0) 185 186 static void carp_hmac_prepare(struct carp_softc *); 187 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 188 unsigned char *); 189 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 190 unsigned char *); 191 static void carp_setroute(struct carp_softc *, int); 192 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 193 static int carp_clone_create(struct if_clone *, int, caddr_t); 194 static void carp_clone_destroy(struct ifnet *); 195 static void carpdetach(struct carp_softc *); 196 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 197 struct carp_header *); 198 static void carp_send_ad_all(void); 199 static void carp_send_ad(void *); 200 static void carp_send_ad_locked(struct carp_softc *); 201 static void carp_send_arp(struct carp_softc *); 202 static void carp_master_down(void *); 203 static void carp_master_down_locked(struct carp_softc *); 204 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 205 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 206 struct rtentry *); 207 static void carp_start(struct ifnet *); 208 static void carp_setrun(struct carp_softc *, sa_family_t); 209 static void carp_set_state(struct carp_softc *, int); 210 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 211 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 212 213 static void carp_multicast_cleanup(struct carp_softc *); 214 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 215 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 216 static void carp_carpdev_state_locked(struct carp_if *); 217 static void carp_sc_state_locked(struct carp_softc *); 218 #ifdef INET6 219 static void carp_send_na(struct carp_softc *); 220 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 221 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 222 #endif 223 224 static LIST_HEAD(, carp_softc) carpif_list; 225 static struct mtx carp_mtx; 226 IFC_SIMPLE_DECLARE(carp, 0); 227 228 static eventhandler_tag if_detach_event_tag; 229 230 static __inline u_int16_t 231 carp_cksum(struct mbuf *m, int len) 232 { 233 return (in_cksum(m, len)); 234 } 235 236 static void 237 carp_hmac_prepare(struct carp_softc *sc) 238 { 239 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 240 u_int8_t vhid = sc->sc_vhid & 0xff; 241 struct ifaddr *ifa; 242 int i; 243 #ifdef INET6 244 struct in6_addr in6; 245 #endif 246 247 if (sc->sc_carpdev) 248 CARP_SCLOCK(sc); 249 250 /* XXX: possible race here */ 251 252 /* compute ipad from key */ 253 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 254 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 255 for (i = 0; i < sizeof(sc->sc_pad); i++) 256 sc->sc_pad[i] ^= 0x36; 257 258 /* precompute first part of inner hash */ 259 SHA1Init(&sc->sc_sha1); 260 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 261 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 262 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 263 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 264 #ifdef INET 265 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 266 if (ifa->ifa_addr->sa_family == AF_INET) 267 SHA1Update(&sc->sc_sha1, 268 (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, 269 sizeof(struct in_addr)); 270 } 271 #endif /* INET */ 272 #ifdef INET6 273 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 274 if (ifa->ifa_addr->sa_family == AF_INET6) { 275 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 276 in6_clearscope(&in6); 277 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 278 } 279 } 280 #endif /* INET6 */ 281 282 /* convert ipad to opad */ 283 for (i = 0; i < sizeof(sc->sc_pad); i++) 284 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 285 286 if (sc->sc_carpdev) 287 CARP_SCUNLOCK(sc); 288 } 289 290 static void 291 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 292 unsigned char md[20]) 293 { 294 SHA1_CTX sha1ctx; 295 296 /* fetch first half of inner hash */ 297 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 298 299 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 300 SHA1Final(md, &sha1ctx); 301 302 /* outer hash */ 303 SHA1Init(&sha1ctx); 304 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 305 SHA1Update(&sha1ctx, md, 20); 306 SHA1Final(md, &sha1ctx); 307 } 308 309 static int 310 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 311 unsigned char md[20]) 312 { 313 unsigned char md2[20]; 314 315 CARP_SCLOCK_ASSERT(sc); 316 317 carp_hmac_generate(sc, counter, md2); 318 319 return (bcmp(md, md2, sizeof(md2))); 320 } 321 322 static void 323 carp_setroute(struct carp_softc *sc, int cmd) 324 { 325 struct ifaddr *ifa; 326 int s; 327 328 if (sc->sc_carpdev) 329 CARP_SCLOCK_ASSERT(sc); 330 331 s = splnet(); 332 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 333 if (ifa->ifa_addr->sa_family == AF_INET && 334 sc->sc_carpdev != NULL) { 335 int count = carp_addrcount( 336 (struct carp_if *)sc->sc_carpdev->if_carp, 337 ifatoia(ifa), CARP_COUNT_MASTER); 338 339 if ((cmd == RTM_ADD && count == 1) || 340 (cmd == RTM_DELETE && count == 0)) 341 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 342 } 343 #ifdef INET6 344 if (ifa->ifa_addr->sa_family == AF_INET6) { 345 if (cmd == RTM_ADD) 346 in6_ifaddloop(ifa); 347 else 348 in6_ifremloop(ifa); 349 } 350 #endif /* INET6 */ 351 } 352 splx(s); 353 } 354 355 static int 356 carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) 357 { 358 359 struct carp_softc *sc; 360 struct ifnet *ifp; 361 362 MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 363 ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); 364 if (ifp == NULL) { 365 FREE(sc, M_CARP); 366 return (ENOSPC); 367 } 368 369 sc->sc_flags_backup = 0; 370 sc->sc_suppress = 0; 371 sc->sc_advbase = CARP_DFLTINTV; 372 sc->sc_vhid = -1; /* required setting */ 373 sc->sc_advskew = 0; 374 sc->sc_init_counter = 1; 375 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 376 #ifdef INET6 377 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 378 #endif 379 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 380 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 381 M_WAITOK); 382 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 383 sc->sc_imo.imo_multicast_vif = -1; 384 385 callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE); 386 callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE); 387 callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE); 388 389 ifp->if_softc = sc; 390 if_initname(ifp, CARP_IFNAME, unit); 391 ifp->if_mtu = ETHERMTU; 392 ifp->if_flags = IFF_LOOPBACK; 393 ifp->if_ioctl = carp_ioctl; 394 ifp->if_output = carp_looutput; 395 ifp->if_start = carp_start; 396 ifp->if_type = IFT_CARP; 397 ifp->if_snd.ifq_maxlen = ifqmaxlen; 398 ifp->if_hdrlen = 0; 399 if_attach(ifp); 400 bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 401 mtx_lock(&carp_mtx); 402 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 403 mtx_unlock(&carp_mtx); 404 return (0); 405 } 406 407 static void 408 carp_clone_destroy(struct ifnet *ifp) 409 { 410 struct carp_softc *sc = ifp->if_softc; 411 412 if (sc->sc_carpdev) 413 CARP_SCLOCK(sc); 414 carpdetach(sc); 415 if (sc->sc_carpdev) 416 CARP_SCUNLOCK(sc); 417 418 mtx_lock(&carp_mtx); 419 LIST_REMOVE(sc, sc_next); 420 mtx_unlock(&carp_mtx); 421 bpfdetach(ifp); 422 if_detach(ifp); 423 if_free_type(ifp, IFT_ETHER); 424 free(sc->sc_imo.imo_membership, M_CARP); 425 free(sc, M_CARP); 426 } 427 428 static void 429 carpdetach(struct carp_softc *sc) 430 { 431 struct carp_if *cif; 432 433 callout_stop(&sc->sc_ad_tmo); 434 callout_stop(&sc->sc_md_tmo); 435 callout_stop(&sc->sc_md6_tmo); 436 437 if (sc->sc_suppress) 438 carp_suppress_preempt--; 439 sc->sc_suppress = 0; 440 441 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 442 carp_suppress_preempt--; 443 sc->sc_sendad_errors = 0; 444 445 carp_set_state(sc, INIT); 446 SC2IFP(sc)->if_flags &= ~IFF_UP; 447 carp_setrun(sc, 0); 448 carp_multicast_cleanup(sc); 449 450 if (sc->sc_carpdev != NULL) { 451 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 452 CARP_LOCK_ASSERT(cif); 453 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 454 if (!--cif->vhif_nvrs) { 455 ifpromisc(sc->sc_carpdev, 0); 456 sc->sc_carpdev->if_carp = NULL; 457 CARP_LOCK_DESTROY(cif); 458 FREE(cif, M_IFADDR); 459 } 460 } 461 sc->sc_carpdev = NULL; 462 } 463 464 /* Detach an interface from the carp. */ 465 static void 466 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 467 { 468 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 469 struct carp_softc *sc, *nextsc; 470 471 if (cif == NULL) 472 return; 473 474 /* 475 * XXX: At the end of for() cycle the lock will be destroyed. 476 */ 477 CARP_LOCK(cif); 478 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 479 nextsc = TAILQ_NEXT(sc, sc_list); 480 carpdetach(sc); 481 } 482 } 483 484 /* 485 * process input packet. 486 * we have rearranged checks order compared to the rfc, 487 * but it seems more efficient this way or not possible otherwise. 488 */ 489 void 490 carp_input(struct mbuf *m, int hlen) 491 { 492 struct ip *ip = mtod(m, struct ip *); 493 struct carp_header *ch; 494 int iplen, len; 495 496 carpstats.carps_ipackets++; 497 498 if (!carp_opts[CARPCTL_ALLOW]) { 499 m_freem(m); 500 return; 501 } 502 503 /* check if received on a valid carp interface */ 504 if (m->m_pkthdr.rcvif->if_carp == NULL) { 505 carpstats.carps_badif++; 506 CARP_LOG("carp_input: packet received on non-carp " 507 "interface: %s\n", 508 m->m_pkthdr.rcvif->if_xname); 509 m_freem(m); 510 return; 511 } 512 513 /* verify that the IP TTL is 255. */ 514 if (ip->ip_ttl != CARP_DFLTTL) { 515 carpstats.carps_badttl++; 516 CARP_LOG("carp_input: received ttl %d != 255i on %s\n", 517 ip->ip_ttl, 518 m->m_pkthdr.rcvif->if_xname); 519 m_freem(m); 520 return; 521 } 522 523 iplen = ip->ip_hl << 2; 524 525 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 526 carpstats.carps_badlen++; 527 CARP_LOG("carp_input: received len %zd < " 528 "sizeof(struct carp_header)\n", 529 m->m_len - sizeof(struct ip)); 530 m_freem(m); 531 return; 532 } 533 534 if (iplen + sizeof(*ch) < m->m_len) { 535 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 536 carpstats.carps_hdrops++; 537 CARP_LOG("carp_input: pullup failed\n"); 538 return; 539 } 540 ip = mtod(m, struct ip *); 541 } 542 ch = (struct carp_header *)((char *)ip + iplen); 543 544 /* 545 * verify that the received packet length is 546 * equal to the CARP header 547 */ 548 len = iplen + sizeof(*ch); 549 if (len > m->m_pkthdr.len) { 550 carpstats.carps_badlen++; 551 CARP_LOG("carp_input: packet too short %d on %s\n", 552 m->m_pkthdr.len, 553 m->m_pkthdr.rcvif->if_xname); 554 m_freem(m); 555 return; 556 } 557 558 if ((m = m_pullup(m, len)) == NULL) { 559 carpstats.carps_hdrops++; 560 return; 561 } 562 ip = mtod(m, struct ip *); 563 ch = (struct carp_header *)((char *)ip + iplen); 564 565 /* verify the CARP checksum */ 566 m->m_data += iplen; 567 if (carp_cksum(m, len - iplen)) { 568 carpstats.carps_badsum++; 569 CARP_LOG("carp_input: checksum failed on %s\n", 570 m->m_pkthdr.rcvif->if_xname); 571 m_freem(m); 572 return; 573 } 574 m->m_data -= iplen; 575 576 carp_input_c(m, ch, AF_INET); 577 } 578 579 #ifdef INET6 580 int 581 carp6_input(struct mbuf **mp, int *offp, int proto) 582 { 583 struct mbuf *m = *mp; 584 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 585 struct carp_header *ch; 586 u_int len; 587 588 carpstats.carps_ipackets6++; 589 590 if (!carp_opts[CARPCTL_ALLOW]) { 591 m_freem(m); 592 return (IPPROTO_DONE); 593 } 594 595 /* check if received on a valid carp interface */ 596 if (m->m_pkthdr.rcvif->if_carp == NULL) { 597 carpstats.carps_badif++; 598 CARP_LOG("carp6_input: packet received on non-carp " 599 "interface: %s\n", 600 m->m_pkthdr.rcvif->if_xname); 601 m_freem(m); 602 return (IPPROTO_DONE); 603 } 604 605 /* verify that the IP TTL is 255 */ 606 if (ip6->ip6_hlim != CARP_DFLTTL) { 607 carpstats.carps_badttl++; 608 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 609 ip6->ip6_hlim, 610 m->m_pkthdr.rcvif->if_xname); 611 m_freem(m); 612 return (IPPROTO_DONE); 613 } 614 615 /* verify that we have a complete carp packet */ 616 len = m->m_len; 617 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 618 if (ch == NULL) { 619 carpstats.carps_badlen++; 620 CARP_LOG("carp6_input: packet size %u too small\n", len); 621 return (IPPROTO_DONE); 622 } 623 624 625 /* verify the CARP checksum */ 626 m->m_data += *offp; 627 if (carp_cksum(m, sizeof(*ch))) { 628 carpstats.carps_badsum++; 629 CARP_LOG("carp6_input: checksum failed, on %s\n", 630 m->m_pkthdr.rcvif->if_xname); 631 m_freem(m); 632 return (IPPROTO_DONE); 633 } 634 m->m_data -= *offp; 635 636 carp_input_c(m, ch, AF_INET6); 637 return (IPPROTO_DONE); 638 } 639 #endif /* INET6 */ 640 641 static void 642 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 643 { 644 struct ifnet *ifp = m->m_pkthdr.rcvif; 645 struct carp_softc *sc; 646 u_int64_t tmp_counter; 647 struct timeval sc_tv, ch_tv; 648 649 /* verify that the VHID is valid on the receiving interface */ 650 CARP_LOCK(ifp->if_carp); 651 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 652 if (sc->sc_vhid == ch->carp_vhid) 653 break; 654 655 if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && 656 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 657 carpstats.carps_badvhid++; 658 CARP_UNLOCK(ifp->if_carp); 659 m_freem(m); 660 return; 661 } 662 663 getmicrotime(&SC2IFP(sc)->if_lastchange); 664 SC2IFP(sc)->if_ipackets++; 665 SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; 666 667 if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { 668 struct ip *ip = mtod(m, struct ip *); 669 uint32_t af1 = af; 670 671 /* BPF wants net byte order */ 672 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 673 ip->ip_off = htons(ip->ip_off); 674 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); 675 } 676 677 /* verify the CARP version. */ 678 if (ch->carp_version != CARP_VERSION) { 679 carpstats.carps_badver++; 680 SC2IFP(sc)->if_ierrors++; 681 CARP_UNLOCK(ifp->if_carp); 682 CARP_LOG("%s; invalid version %d\n", 683 SC2IFP(sc)->if_xname, 684 ch->carp_version); 685 m_freem(m); 686 return; 687 } 688 689 /* verify the hash */ 690 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 691 carpstats.carps_badauth++; 692 SC2IFP(sc)->if_ierrors++; 693 CARP_UNLOCK(ifp->if_carp); 694 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); 695 m_freem(m); 696 return; 697 } 698 699 tmp_counter = ntohl(ch->carp_counter[0]); 700 tmp_counter = tmp_counter<<32; 701 tmp_counter += ntohl(ch->carp_counter[1]); 702 703 /* XXX Replay protection goes here */ 704 705 sc->sc_init_counter = 0; 706 sc->sc_counter = tmp_counter; 707 708 sc_tv.tv_sec = sc->sc_advbase; 709 if (carp_suppress_preempt && sc->sc_advskew < 240) 710 sc_tv.tv_usec = 240 * 1000000 / 256; 711 else 712 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 713 ch_tv.tv_sec = ch->carp_advbase; 714 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 715 716 switch (sc->sc_state) { 717 case INIT: 718 break; 719 case MASTER: 720 /* 721 * If we receive an advertisement from a master who's going to 722 * be more frequent than us, go into BACKUP state. 723 */ 724 if (timevalcmp(&sc_tv, &ch_tv, >) || 725 timevalcmp(&sc_tv, &ch_tv, ==)) { 726 callout_stop(&sc->sc_ad_tmo); 727 CARP_DEBUG("%s: MASTER -> BACKUP " 728 "(more frequent advertisement received)\n", 729 SC2IFP(sc)->if_xname); 730 carp_set_state(sc, BACKUP); 731 carp_setrun(sc, 0); 732 carp_setroute(sc, RTM_DELETE); 733 } 734 break; 735 case BACKUP: 736 /* 737 * If we're pre-empting masters who advertise slower than us, 738 * and this one claims to be slower, treat him as down. 739 */ 740 if (carp_opts[CARPCTL_PREEMPT] && 741 timevalcmp(&sc_tv, &ch_tv, <)) { 742 CARP_DEBUG("%s: BACKUP -> MASTER " 743 "(preempting a slower master)\n", 744 SC2IFP(sc)->if_xname); 745 carp_master_down_locked(sc); 746 break; 747 } 748 749 /* 750 * If the master is going to advertise at such a low frequency 751 * that he's guaranteed to time out, we'd might as well just 752 * treat him as timed out now. 753 */ 754 sc_tv.tv_sec = sc->sc_advbase * 3; 755 if (timevalcmp(&sc_tv, &ch_tv, <)) { 756 CARP_DEBUG("%s: BACKUP -> MASTER " 757 "(master timed out)\n", 758 SC2IFP(sc)->if_xname); 759 carp_master_down_locked(sc); 760 break; 761 } 762 763 /* 764 * Otherwise, we reset the counter and wait for the next 765 * advertisement. 766 */ 767 carp_setrun(sc, af); 768 break; 769 } 770 771 CARP_UNLOCK(ifp->if_carp); 772 773 m_freem(m); 774 return; 775 } 776 777 static int 778 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 779 { 780 struct m_tag *mtag; 781 struct ifnet *ifp = SC2IFP(sc); 782 783 if (sc->sc_init_counter) { 784 /* this could also be seconds since unix epoch */ 785 sc->sc_counter = arc4random(); 786 sc->sc_counter = sc->sc_counter << 32; 787 sc->sc_counter += arc4random(); 788 } else 789 sc->sc_counter++; 790 791 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 792 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 793 794 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 795 796 /* Tag packet for carp_output */ 797 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 798 if (mtag == NULL) { 799 m_freem(m); 800 SC2IFP(sc)->if_oerrors++; 801 return (ENOMEM); 802 } 803 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 804 m_tag_prepend(m, mtag); 805 806 return (0); 807 } 808 809 static void 810 carp_send_ad_all(void) 811 { 812 struct carp_softc *sc; 813 814 mtx_lock(&carp_mtx); 815 LIST_FOREACH(sc, &carpif_list, sc_next) { 816 if (sc->sc_carpdev == NULL) 817 continue; 818 CARP_SCLOCK(sc); 819 if ((SC2IFP(sc)->if_flags & IFF_UP) && 820 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && 821 sc->sc_state == MASTER) 822 carp_send_ad_locked(sc); 823 CARP_SCUNLOCK(sc); 824 } 825 mtx_unlock(&carp_mtx); 826 } 827 828 static void 829 carp_send_ad(void *v) 830 { 831 struct carp_softc *sc = v; 832 833 CARP_SCLOCK(sc); 834 carp_send_ad_locked(sc); 835 CARP_SCUNLOCK(sc); 836 } 837 838 static void 839 carp_send_ad_locked(struct carp_softc *sc) 840 { 841 struct carp_header ch; 842 struct timeval tv; 843 struct carp_header *ch_ptr; 844 struct mbuf *m; 845 int len, advbase, advskew; 846 847 CARP_SCLOCK_ASSERT(sc); 848 849 /* bow out if we've lost our UPness or RUNNINGuiness */ 850 if (!((SC2IFP(sc)->if_flags & IFF_UP) && 851 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { 852 advbase = 255; 853 advskew = 255; 854 } else { 855 advbase = sc->sc_advbase; 856 if (!carp_suppress_preempt || sc->sc_advskew > 240) 857 advskew = sc->sc_advskew; 858 else 859 advskew = 240; 860 tv.tv_sec = advbase; 861 tv.tv_usec = advskew * 1000000 / 256; 862 } 863 864 ch.carp_version = CARP_VERSION; 865 ch.carp_type = CARP_ADVERTISEMENT; 866 ch.carp_vhid = sc->sc_vhid; 867 ch.carp_advbase = advbase; 868 ch.carp_advskew = advskew; 869 ch.carp_authlen = 7; /* XXX DEFINE */ 870 ch.carp_pad1 = 0; /* must be zero */ 871 ch.carp_cksum = 0; 872 873 #ifdef INET 874 if (sc->sc_ia) { 875 struct ip *ip; 876 877 MGETHDR(m, M_DONTWAIT, MT_HEADER); 878 if (m == NULL) { 879 SC2IFP(sc)->if_oerrors++; 880 carpstats.carps_onomem++; 881 /* XXX maybe less ? */ 882 if (advbase != 255 || advskew != 255) 883 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 884 carp_send_ad, sc); 885 return; 886 } 887 len = sizeof(*ip) + sizeof(ch); 888 m->m_pkthdr.len = len; 889 m->m_pkthdr.rcvif = NULL; 890 m->m_len = len; 891 MH_ALIGN(m, m->m_len); 892 m->m_flags |= M_MCAST; 893 ip = mtod(m, struct ip *); 894 ip->ip_v = IPVERSION; 895 ip->ip_hl = sizeof(*ip) >> 2; 896 ip->ip_tos = IPTOS_LOWDELAY; 897 ip->ip_len = len; 898 ip->ip_id = ip_newid(); 899 ip->ip_off = IP_DF; 900 ip->ip_ttl = CARP_DFLTTL; 901 ip->ip_p = IPPROTO_CARP; 902 ip->ip_sum = 0; 903 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 904 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 905 906 ch_ptr = (struct carp_header *)(&ip[1]); 907 bcopy(&ch, ch_ptr, sizeof(ch)); 908 if (carp_prepare_ad(m, sc, ch_ptr)) 909 return; 910 911 m->m_data += sizeof(*ip); 912 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 913 m->m_data -= sizeof(*ip); 914 915 getmicrotime(&SC2IFP(sc)->if_lastchange); 916 SC2IFP(sc)->if_opackets++; 917 SC2IFP(sc)->if_obytes += len; 918 carpstats.carps_opackets++; 919 920 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 921 SC2IFP(sc)->if_oerrors++; 922 if (sc->sc_sendad_errors < INT_MAX) 923 sc->sc_sendad_errors++; 924 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 925 carp_suppress_preempt++; 926 if (carp_suppress_preempt == 1) { 927 CARP_SCUNLOCK(sc); 928 carp_send_ad_all(); 929 CARP_SCLOCK(sc); 930 } 931 } 932 sc->sc_sendad_success = 0; 933 } else { 934 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 935 if (++sc->sc_sendad_success >= 936 CARP_SENDAD_MIN_SUCCESS) { 937 carp_suppress_preempt--; 938 sc->sc_sendad_errors = 0; 939 } 940 } else 941 sc->sc_sendad_errors = 0; 942 } 943 } 944 #endif /* INET */ 945 #ifdef INET6 946 if (sc->sc_ia6) { 947 struct ip6_hdr *ip6; 948 949 MGETHDR(m, M_DONTWAIT, MT_HEADER); 950 if (m == NULL) { 951 SC2IFP(sc)->if_oerrors++; 952 carpstats.carps_onomem++; 953 /* XXX maybe less ? */ 954 if (advbase != 255 || advskew != 255) 955 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 956 carp_send_ad, sc); 957 return; 958 } 959 len = sizeof(*ip6) + sizeof(ch); 960 m->m_pkthdr.len = len; 961 m->m_pkthdr.rcvif = NULL; 962 m->m_len = len; 963 MH_ALIGN(m, m->m_len); 964 m->m_flags |= M_MCAST; 965 ip6 = mtod(m, struct ip6_hdr *); 966 bzero(ip6, sizeof(*ip6)); 967 ip6->ip6_vfc |= IPV6_VERSION; 968 ip6->ip6_hlim = CARP_DFLTTL; 969 ip6->ip6_nxt = IPPROTO_CARP; 970 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 971 sizeof(struct in6_addr)); 972 /* set the multicast destination */ 973 974 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 975 ip6->ip6_dst.s6_addr8[15] = 0x12; 976 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 977 SC2IFP(sc)->if_oerrors++; 978 m_freem(m); 979 CARP_LOG("%s: in6_setscope failed\n", __func__); 980 return; 981 } 982 983 ch_ptr = (struct carp_header *)(&ip6[1]); 984 bcopy(&ch, ch_ptr, sizeof(ch)); 985 if (carp_prepare_ad(m, sc, ch_ptr)) 986 return; 987 988 m->m_data += sizeof(*ip6); 989 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 990 m->m_data -= sizeof(*ip6); 991 992 getmicrotime(&SC2IFP(sc)->if_lastchange); 993 SC2IFP(sc)->if_opackets++; 994 SC2IFP(sc)->if_obytes += len; 995 carpstats.carps_opackets6++; 996 997 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 998 SC2IFP(sc)->if_oerrors++; 999 if (sc->sc_sendad_errors < INT_MAX) 1000 sc->sc_sendad_errors++; 1001 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1002 carp_suppress_preempt++; 1003 if (carp_suppress_preempt == 1) { 1004 CARP_SCUNLOCK(sc); 1005 carp_send_ad_all(); 1006 CARP_SCLOCK(sc); 1007 } 1008 } 1009 sc->sc_sendad_success = 0; 1010 } else { 1011 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1012 if (++sc->sc_sendad_success >= 1013 CARP_SENDAD_MIN_SUCCESS) { 1014 carp_suppress_preempt--; 1015 sc->sc_sendad_errors = 0; 1016 } 1017 } else 1018 sc->sc_sendad_errors = 0; 1019 } 1020 } 1021 #endif /* INET6 */ 1022 1023 if (advbase != 255 || advskew != 255) 1024 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1025 carp_send_ad, sc); 1026 1027 } 1028 1029 /* 1030 * Broadcast a gratuitous ARP request containing 1031 * the virtual router MAC address for each IP address 1032 * associated with the virtual router. 1033 */ 1034 static void 1035 carp_send_arp(struct carp_softc *sc) 1036 { 1037 struct ifaddr *ifa; 1038 1039 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1040 1041 if (ifa->ifa_addr->sa_family != AF_INET) 1042 continue; 1043 1044 /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ 1045 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); 1046 1047 DELAY(1000); /* XXX */ 1048 } 1049 } 1050 1051 #ifdef INET6 1052 static void 1053 carp_send_na(struct carp_softc *sc) 1054 { 1055 struct ifaddr *ifa; 1056 struct in6_addr *in6; 1057 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1058 1059 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1060 1061 if (ifa->ifa_addr->sa_family != AF_INET6) 1062 continue; 1063 1064 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1065 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1066 ND_NA_FLAG_OVERRIDE, 1, NULL); 1067 DELAY(1000); /* XXX */ 1068 } 1069 } 1070 #endif /* INET6 */ 1071 1072 static int 1073 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1074 { 1075 struct carp_softc *vh; 1076 struct ifaddr *ifa; 1077 int count = 0; 1078 1079 CARP_LOCK_ASSERT(cif); 1080 1081 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1082 if ((type == CARP_COUNT_RUNNING && 1083 (SC2IFP(vh)->if_flags & IFF_UP) && 1084 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || 1085 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1086 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1087 ifa_list) { 1088 if (ifa->ifa_addr->sa_family == AF_INET && 1089 ia->ia_addr.sin_addr.s_addr == 1090 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1091 count++; 1092 } 1093 } 1094 } 1095 return (count); 1096 } 1097 1098 int 1099 carp_iamatch(void *v, struct in_ifaddr *ia, 1100 struct in_addr *isaddr, u_int8_t **enaddr) 1101 { 1102 struct carp_if *cif = v; 1103 struct carp_softc *vh; 1104 int index, count = 0; 1105 struct ifaddr *ifa; 1106 1107 CARP_LOCK(cif); 1108 1109 if (carp_opts[CARPCTL_ARPBALANCE]) { 1110 /* 1111 * XXX proof of concept implementation. 1112 * We use the source ip to decide which virtual host should 1113 * handle the request. If we're master of that virtual host, 1114 * then we respond, otherwise, just drop the arp packet on 1115 * the floor. 1116 */ 1117 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1118 if (count == 0) { 1119 /* should never reach this */ 1120 CARP_UNLOCK(cif); 1121 return (0); 1122 } 1123 1124 /* this should be a hash, like pf_hash() */ 1125 index = ntohl(isaddr->s_addr) % count; 1126 count = 0; 1127 1128 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1129 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1130 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { 1131 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, 1132 ifa_list) { 1133 if (ifa->ifa_addr->sa_family == 1134 AF_INET && 1135 ia->ia_addr.sin_addr.s_addr == 1136 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1137 if (count == index) { 1138 if (vh->sc_state == 1139 MASTER) { 1140 *enaddr = IF_LLADDR(vh->sc_ifp); 1141 CARP_UNLOCK(cif); 1142 return (1); 1143 } else { 1144 CARP_UNLOCK(cif); 1145 return (0); 1146 } 1147 } 1148 count++; 1149 } 1150 } 1151 } 1152 } 1153 } else { 1154 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1155 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1156 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1157 ia->ia_ifp == SC2IFP(vh) && 1158 vh->sc_state == MASTER) { 1159 *enaddr = IF_LLADDR(vh->sc_ifp); 1160 CARP_UNLOCK(cif); 1161 return (1); 1162 } 1163 } 1164 } 1165 CARP_UNLOCK(cif); 1166 return (0); 1167 } 1168 1169 #ifdef INET6 1170 struct ifaddr * 1171 carp_iamatch6(void *v, struct in6_addr *taddr) 1172 { 1173 struct carp_if *cif = v; 1174 struct carp_softc *vh; 1175 struct ifaddr *ifa; 1176 1177 CARP_LOCK(cif); 1178 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1179 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { 1180 if (IN6_ARE_ADDR_EQUAL(taddr, 1181 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1182 (SC2IFP(vh)->if_flags & IFF_UP) && 1183 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1184 vh->sc_state == MASTER) { 1185 CARP_UNLOCK(cif); 1186 return (ifa); 1187 } 1188 } 1189 } 1190 CARP_UNLOCK(cif); 1191 1192 return (NULL); 1193 } 1194 1195 void * 1196 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1197 { 1198 struct m_tag *mtag; 1199 struct carp_if *cif = v; 1200 struct carp_softc *sc; 1201 struct ifaddr *ifa; 1202 1203 CARP_LOCK(cif); 1204 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1205 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { 1206 if (IN6_ARE_ADDR_EQUAL(taddr, 1207 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1208 (SC2IFP(sc)->if_flags & IFF_UP) && 1209 (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { 1210 struct ifnet *ifp = SC2IFP(sc); 1211 mtag = m_tag_get(PACKET_TAG_CARP, 1212 sizeof(struct ifnet *), M_NOWAIT); 1213 if (mtag == NULL) { 1214 /* better a bit than nothing */ 1215 CARP_UNLOCK(cif); 1216 return (IF_LLADDR(sc->sc_ifp)); 1217 } 1218 bcopy(&ifp, (caddr_t)(mtag + 1), 1219 sizeof(struct ifnet *)); 1220 m_tag_prepend(m, mtag); 1221 1222 CARP_UNLOCK(cif); 1223 return (IF_LLADDR(sc->sc_ifp)); 1224 } 1225 } 1226 } 1227 CARP_UNLOCK(cif); 1228 1229 return (NULL); 1230 } 1231 #endif 1232 1233 struct ifnet * 1234 carp_forus(void *v, void *dhost) 1235 { 1236 struct carp_if *cif = v; 1237 struct carp_softc *vh; 1238 u_int8_t *ena = dhost; 1239 1240 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1241 return (NULL); 1242 1243 CARP_LOCK(cif); 1244 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1245 if ((SC2IFP(vh)->if_flags & IFF_UP) && 1246 (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && 1247 vh->sc_state == MASTER && 1248 !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { 1249 CARP_UNLOCK(cif); 1250 return (SC2IFP(vh)); 1251 } 1252 1253 CARP_UNLOCK(cif); 1254 return (NULL); 1255 } 1256 1257 static void 1258 carp_master_down(void *v) 1259 { 1260 struct carp_softc *sc = v; 1261 1262 CARP_SCLOCK(sc); 1263 carp_master_down_locked(sc); 1264 CARP_SCUNLOCK(sc); 1265 } 1266 1267 static void 1268 carp_master_down_locked(struct carp_softc *sc) 1269 { 1270 if (sc->sc_carpdev) 1271 CARP_SCLOCK_ASSERT(sc); 1272 1273 switch (sc->sc_state) { 1274 case INIT: 1275 printf("%s: master_down event in INIT state\n", 1276 SC2IFP(sc)->if_xname); 1277 break; 1278 case MASTER: 1279 break; 1280 case BACKUP: 1281 carp_set_state(sc, MASTER); 1282 carp_send_ad_locked(sc); 1283 carp_send_arp(sc); 1284 #ifdef INET6 1285 carp_send_na(sc); 1286 #endif /* INET6 */ 1287 carp_setrun(sc, 0); 1288 carp_setroute(sc, RTM_ADD); 1289 break; 1290 } 1291 } 1292 1293 /* 1294 * When in backup state, af indicates whether to reset the master down timer 1295 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1296 */ 1297 static void 1298 carp_setrun(struct carp_softc *sc, sa_family_t af) 1299 { 1300 struct timeval tv; 1301 1302 if (sc->sc_carpdev == NULL) { 1303 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1304 carp_set_state(sc, INIT); 1305 return; 1306 } else 1307 CARP_SCLOCK_ASSERT(sc); 1308 1309 if (SC2IFP(sc)->if_flags & IFF_UP && 1310 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) 1311 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 1312 else { 1313 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1314 carp_setroute(sc, RTM_DELETE); 1315 return; 1316 } 1317 1318 switch (sc->sc_state) { 1319 case INIT: 1320 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1321 carp_send_ad_locked(sc); 1322 carp_send_arp(sc); 1323 #ifdef INET6 1324 carp_send_na(sc); 1325 #endif /* INET6 */ 1326 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1327 SC2IFP(sc)->if_xname); 1328 carp_set_state(sc, MASTER); 1329 carp_setroute(sc, RTM_ADD); 1330 } else { 1331 CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); 1332 carp_set_state(sc, BACKUP); 1333 carp_setroute(sc, RTM_DELETE); 1334 carp_setrun(sc, 0); 1335 } 1336 break; 1337 case BACKUP: 1338 callout_stop(&sc->sc_ad_tmo); 1339 tv.tv_sec = 3 * sc->sc_advbase; 1340 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1341 switch (af) { 1342 #ifdef INET 1343 case AF_INET: 1344 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1345 carp_master_down, sc); 1346 break; 1347 #endif /* INET */ 1348 #ifdef INET6 1349 case AF_INET6: 1350 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1351 carp_master_down, sc); 1352 break; 1353 #endif /* INET6 */ 1354 default: 1355 if (sc->sc_naddrs) 1356 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1357 carp_master_down, sc); 1358 if (sc->sc_naddrs6) 1359 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1360 carp_master_down, sc); 1361 break; 1362 } 1363 break; 1364 case MASTER: 1365 tv.tv_sec = sc->sc_advbase; 1366 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1367 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1368 carp_send_ad, sc); 1369 break; 1370 } 1371 } 1372 1373 void 1374 carp_multicast_cleanup(struct carp_softc *sc) 1375 { 1376 struct ip_moptions *imo = &sc->sc_imo; 1377 #ifdef INET6 1378 struct ip6_moptions *im6o = &sc->sc_im6o; 1379 #endif 1380 u_int16_t n = imo->imo_num_memberships; 1381 1382 /* Clean up our own multicast memberships */ 1383 while (n-- > 0) { 1384 if (imo->imo_membership[n] != NULL) { 1385 in_delmulti(imo->imo_membership[n]); 1386 imo->imo_membership[n] = NULL; 1387 } 1388 } 1389 imo->imo_num_memberships = 0; 1390 imo->imo_multicast_ifp = NULL; 1391 1392 #ifdef INET6 1393 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1394 struct in6_multi_mship *imm = 1395 LIST_FIRST(&im6o->im6o_memberships); 1396 1397 LIST_REMOVE(imm, i6mm_chain); 1398 in6_leavegroup(imm); 1399 } 1400 im6o->im6o_multicast_ifp = NULL; 1401 #endif 1402 } 1403 1404 static int 1405 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1406 { 1407 struct ifnet *ifp; 1408 struct carp_if *cif; 1409 struct in_ifaddr *ia, *ia_if; 1410 struct ip_moptions *imo = &sc->sc_imo; 1411 struct in_addr addr; 1412 u_long iaddr = htonl(sin->sin_addr.s_addr); 1413 int own, error; 1414 1415 if (sin->sin_addr.s_addr == 0) { 1416 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1417 carp_set_state(sc, INIT); 1418 if (sc->sc_naddrs) 1419 SC2IFP(sc)->if_flags |= IFF_UP; 1420 carp_setrun(sc, 0); 1421 return (0); 1422 } 1423 1424 /* we have to do it by hands to check we won't match on us */ 1425 ia_if = NULL; own = 0; 1426 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1427 /* and, yeah, we need a multicast-capable iface too */ 1428 if (ia->ia_ifp != SC2IFP(sc) && 1429 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1430 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1431 if (!ia_if) 1432 ia_if = ia; 1433 if (sin->sin_addr.s_addr == 1434 ia->ia_addr.sin_addr.s_addr) 1435 own++; 1436 } 1437 } 1438 1439 if (!ia_if) 1440 return (EADDRNOTAVAIL); 1441 1442 ia = ia_if; 1443 ifp = ia->ia_ifp; 1444 1445 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1446 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) 1447 return (EADDRNOTAVAIL); 1448 1449 if (imo->imo_num_memberships == 0) { 1450 addr.s_addr = htonl(INADDR_CARP_GROUP); 1451 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 1452 return (ENOBUFS); 1453 imo->imo_num_memberships++; 1454 imo->imo_multicast_ifp = ifp; 1455 imo->imo_multicast_ttl = CARP_DFLTTL; 1456 imo->imo_multicast_loop = 0; 1457 } 1458 1459 if (!ifp->if_carp) { 1460 1461 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1462 M_WAITOK|M_ZERO); 1463 if (!cif) { 1464 error = ENOBUFS; 1465 goto cleanup; 1466 } 1467 if ((error = ifpromisc(ifp, 1))) { 1468 FREE(cif, M_CARP); 1469 goto cleanup; 1470 } 1471 1472 CARP_LOCK_INIT(cif); 1473 CARP_LOCK(cif); 1474 cif->vhif_ifp = ifp; 1475 TAILQ_INIT(&cif->vhif_vrs); 1476 ifp->if_carp = cif; 1477 1478 } else { 1479 struct carp_softc *vr; 1480 1481 cif = (struct carp_if *)ifp->if_carp; 1482 CARP_LOCK(cif); 1483 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1484 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1485 CARP_UNLOCK(cif); 1486 error = EINVAL; 1487 goto cleanup; 1488 } 1489 } 1490 sc->sc_ia = ia; 1491 sc->sc_carpdev = ifp; 1492 1493 { /* XXX prevent endless loop if already in queue */ 1494 struct carp_softc *vr, *after = NULL; 1495 int myself = 0; 1496 cif = (struct carp_if *)ifp->if_carp; 1497 1498 /* XXX: cif should not change, right? So we still hold the lock */ 1499 CARP_LOCK_ASSERT(cif); 1500 1501 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1502 if (vr == sc) 1503 myself = 1; 1504 if (vr->sc_vhid < sc->sc_vhid) 1505 after = vr; 1506 } 1507 1508 if (!myself) { 1509 /* We're trying to keep things in order */ 1510 if (after == NULL) { 1511 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1512 } else { 1513 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1514 } 1515 cif->vhif_nvrs++; 1516 } 1517 } 1518 1519 sc->sc_naddrs++; 1520 SC2IFP(sc)->if_flags |= IFF_UP; 1521 if (own) 1522 sc->sc_advskew = 0; 1523 carp_sc_state_locked(sc); 1524 carp_setrun(sc, 0); 1525 1526 CARP_UNLOCK(cif); 1527 1528 return (0); 1529 1530 cleanup: 1531 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1532 return (error); 1533 } 1534 1535 static int 1536 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1537 { 1538 int error = 0; 1539 1540 if (!--sc->sc_naddrs) { 1541 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1542 struct ip_moptions *imo = &sc->sc_imo; 1543 1544 CARP_LOCK(cif); 1545 callout_stop(&sc->sc_ad_tmo); 1546 SC2IFP(sc)->if_flags &= ~IFF_UP; 1547 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1548 sc->sc_vhid = -1; 1549 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1550 imo->imo_multicast_ifp = NULL; 1551 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1552 if (!--cif->vhif_nvrs) { 1553 sc->sc_carpdev->if_carp = NULL; 1554 CARP_LOCK_DESTROY(cif); 1555 FREE(cif, M_IFADDR); 1556 } else { 1557 CARP_UNLOCK(cif); 1558 } 1559 } 1560 1561 return (error); 1562 } 1563 1564 #ifdef INET6 1565 static int 1566 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1567 { 1568 struct ifnet *ifp; 1569 struct carp_if *cif; 1570 struct in6_ifaddr *ia, *ia_if; 1571 struct ip6_moptions *im6o = &sc->sc_im6o; 1572 struct in6_multi_mship *imm; 1573 struct in6_addr in6; 1574 int own, error; 1575 1576 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1577 if (!(SC2IFP(sc)->if_flags & IFF_UP)) 1578 carp_set_state(sc, INIT); 1579 if (sc->sc_naddrs6) 1580 SC2IFP(sc)->if_flags |= IFF_UP; 1581 carp_setrun(sc, 0); 1582 return (0); 1583 } 1584 1585 /* we have to do it by hands to check we won't match on us */ 1586 ia_if = NULL; own = 0; 1587 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1588 int i; 1589 1590 for (i = 0; i < 4; i++) { 1591 if ((sin6->sin6_addr.s6_addr32[i] & 1592 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1593 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1594 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1595 break; 1596 } 1597 /* and, yeah, we need a multicast-capable iface too */ 1598 if (ia->ia_ifp != SC2IFP(sc) && 1599 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1600 (i == 4)) { 1601 if (!ia_if) 1602 ia_if = ia; 1603 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1604 &ia->ia_addr.sin6_addr)) 1605 own++; 1606 } 1607 } 1608 1609 if (!ia_if) 1610 return (EADDRNOTAVAIL); 1611 ia = ia_if; 1612 ifp = ia->ia_ifp; 1613 1614 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1615 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1616 return (EADDRNOTAVAIL); 1617 1618 if (!sc->sc_naddrs6) { 1619 im6o->im6o_multicast_ifp = ifp; 1620 1621 /* join CARP multicast address */ 1622 bzero(&in6, sizeof(in6)); 1623 in6.s6_addr16[0] = htons(0xff02); 1624 in6.s6_addr8[15] = 0x12; 1625 if (in6_setscope(&in6, ifp, NULL) != 0) 1626 goto cleanup; 1627 if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL) 1628 goto cleanup; 1629 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1630 1631 /* join solicited multicast address */ 1632 bzero(&in6, sizeof(in6)); 1633 in6.s6_addr16[0] = htons(0xff02); 1634 in6.s6_addr32[1] = 0; 1635 in6.s6_addr32[2] = htonl(1); 1636 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1637 in6.s6_addr8[12] = 0xff; 1638 if (in6_setscope(&in6, ifp, NULL) != 0) 1639 goto cleanup; 1640 if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL) 1641 goto cleanup; 1642 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1643 } 1644 1645 if (!ifp->if_carp) { 1646 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1647 M_WAITOK|M_ZERO); 1648 if (!cif) { 1649 error = ENOBUFS; 1650 goto cleanup; 1651 } 1652 if ((error = ifpromisc(ifp, 1))) { 1653 FREE(cif, M_CARP); 1654 goto cleanup; 1655 } 1656 1657 CARP_LOCK_INIT(cif); 1658 CARP_LOCK(cif); 1659 cif->vhif_ifp = ifp; 1660 TAILQ_INIT(&cif->vhif_vrs); 1661 ifp->if_carp = cif; 1662 1663 } else { 1664 struct carp_softc *vr; 1665 1666 cif = (struct carp_if *)ifp->if_carp; 1667 CARP_LOCK(cif); 1668 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1669 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1670 CARP_UNLOCK(cif); 1671 error = EINVAL; 1672 goto cleanup; 1673 } 1674 } 1675 sc->sc_ia6 = ia; 1676 sc->sc_carpdev = ifp; 1677 1678 { /* XXX prevent endless loop if already in queue */ 1679 struct carp_softc *vr, *after = NULL; 1680 int myself = 0; 1681 cif = (struct carp_if *)ifp->if_carp; 1682 CARP_LOCK_ASSERT(cif); 1683 1684 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1685 if (vr == sc) 1686 myself = 1; 1687 if (vr->sc_vhid < sc->sc_vhid) 1688 after = vr; 1689 } 1690 1691 if (!myself) { 1692 /* We're trying to keep things in order */ 1693 if (after == NULL) { 1694 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1695 } else { 1696 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1697 } 1698 cif->vhif_nvrs++; 1699 } 1700 } 1701 1702 sc->sc_naddrs6++; 1703 SC2IFP(sc)->if_flags |= IFF_UP; 1704 if (own) 1705 sc->sc_advskew = 0; 1706 carp_sc_state_locked(sc); 1707 carp_setrun(sc, 0); 1708 1709 CARP_UNLOCK(cif); 1710 1711 return (0); 1712 1713 cleanup: 1714 /* clean up multicast memberships */ 1715 if (!sc->sc_naddrs6) { 1716 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1717 imm = LIST_FIRST(&im6o->im6o_memberships); 1718 LIST_REMOVE(imm, i6mm_chain); 1719 in6_leavegroup(imm); 1720 } 1721 } 1722 return (error); 1723 } 1724 1725 static int 1726 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1727 { 1728 int error = 0; 1729 1730 if (!--sc->sc_naddrs6) { 1731 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1732 struct ip6_moptions *im6o = &sc->sc_im6o; 1733 1734 CARP_LOCK(cif); 1735 callout_stop(&sc->sc_ad_tmo); 1736 SC2IFP(sc)->if_flags &= ~IFF_UP; 1737 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 1738 sc->sc_vhid = -1; 1739 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1740 struct in6_multi_mship *imm = 1741 LIST_FIRST(&im6o->im6o_memberships); 1742 1743 LIST_REMOVE(imm, i6mm_chain); 1744 in6_leavegroup(imm); 1745 } 1746 im6o->im6o_multicast_ifp = NULL; 1747 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1748 if (!--cif->vhif_nvrs) { 1749 CARP_LOCK_DESTROY(cif); 1750 sc->sc_carpdev->if_carp = NULL; 1751 FREE(cif, M_IFADDR); 1752 } else 1753 CARP_UNLOCK(cif); 1754 } 1755 1756 return (error); 1757 } 1758 #endif /* INET6 */ 1759 1760 static int 1761 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1762 { 1763 struct carp_softc *sc = ifp->if_softc, *vr; 1764 struct carpreq carpr; 1765 struct ifaddr *ifa; 1766 struct ifreq *ifr; 1767 struct ifaliasreq *ifra; 1768 int locked = 0, error = 0; 1769 1770 ifa = (struct ifaddr *)addr; 1771 ifra = (struct ifaliasreq *)addr; 1772 ifr = (struct ifreq *)addr; 1773 1774 switch (cmd) { 1775 case SIOCSIFADDR: 1776 switch (ifa->ifa_addr->sa_family) { 1777 #ifdef INET 1778 case AF_INET: 1779 SC2IFP(sc)->if_flags |= IFF_UP; 1780 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1781 sizeof(struct sockaddr)); 1782 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1783 break; 1784 #endif /* INET */ 1785 #ifdef INET6 1786 case AF_INET6: 1787 SC2IFP(sc)->if_flags |= IFF_UP; 1788 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1789 break; 1790 #endif /* INET6 */ 1791 default: 1792 error = EAFNOSUPPORT; 1793 break; 1794 } 1795 break; 1796 1797 case SIOCAIFADDR: 1798 switch (ifa->ifa_addr->sa_family) { 1799 #ifdef INET 1800 case AF_INET: 1801 SC2IFP(sc)->if_flags |= IFF_UP; 1802 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1803 sizeof(struct sockaddr)); 1804 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1805 break; 1806 #endif /* INET */ 1807 #ifdef INET6 1808 case AF_INET6: 1809 SC2IFP(sc)->if_flags |= IFF_UP; 1810 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1811 break; 1812 #endif /* INET6 */ 1813 default: 1814 error = EAFNOSUPPORT; 1815 break; 1816 } 1817 break; 1818 1819 case SIOCDIFADDR: 1820 switch (ifa->ifa_addr->sa_family) { 1821 #ifdef INET 1822 case AF_INET: 1823 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1824 break; 1825 #endif /* INET */ 1826 #ifdef INET6 1827 case AF_INET6: 1828 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1829 break; 1830 #endif /* INET6 */ 1831 default: 1832 error = EAFNOSUPPORT; 1833 break; 1834 } 1835 break; 1836 1837 case SIOCSIFFLAGS: 1838 if (sc->sc_carpdev) { 1839 locked = 1; 1840 CARP_SCLOCK(sc); 1841 } 1842 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1843 callout_stop(&sc->sc_ad_tmo); 1844 callout_stop(&sc->sc_md_tmo); 1845 callout_stop(&sc->sc_md6_tmo); 1846 if (sc->sc_state == MASTER) 1847 carp_send_ad_locked(sc); 1848 carp_set_state(sc, INIT); 1849 carp_setrun(sc, 0); 1850 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1851 SC2IFP(sc)->if_flags |= IFF_UP; 1852 carp_setrun(sc, 0); 1853 } 1854 break; 1855 1856 case SIOCSVH: 1857 error = priv_check(curthread, PRIV_NETINET_CARP); 1858 if (error) 1859 break; 1860 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1861 break; 1862 error = 1; 1863 if (sc->sc_carpdev) { 1864 locked = 1; 1865 CARP_SCLOCK(sc); 1866 } 1867 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1868 switch (carpr.carpr_state) { 1869 case BACKUP: 1870 callout_stop(&sc->sc_ad_tmo); 1871 carp_set_state(sc, BACKUP); 1872 carp_setrun(sc, 0); 1873 carp_setroute(sc, RTM_DELETE); 1874 break; 1875 case MASTER: 1876 carp_master_down_locked(sc); 1877 break; 1878 default: 1879 break; 1880 } 1881 } 1882 if (carpr.carpr_vhid > 0) { 1883 if (carpr.carpr_vhid > 255) { 1884 error = EINVAL; 1885 break; 1886 } 1887 if (sc->sc_carpdev) { 1888 struct carp_if *cif; 1889 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1890 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1891 if (vr != sc && 1892 vr->sc_vhid == carpr.carpr_vhid) 1893 return EEXIST; 1894 } 1895 sc->sc_vhid = carpr.carpr_vhid; 1896 IF_LLADDR(sc->sc_ifp)[0] = 0; 1897 IF_LLADDR(sc->sc_ifp)[1] = 0; 1898 IF_LLADDR(sc->sc_ifp)[2] = 0x5e; 1899 IF_LLADDR(sc->sc_ifp)[3] = 0; 1900 IF_LLADDR(sc->sc_ifp)[4] = 1; 1901 IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; 1902 error--; 1903 } 1904 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1905 if (carpr.carpr_advskew >= 255) { 1906 error = EINVAL; 1907 break; 1908 } 1909 if (carpr.carpr_advbase > 255) { 1910 error = EINVAL; 1911 break; 1912 } 1913 sc->sc_advbase = carpr.carpr_advbase; 1914 sc->sc_advskew = carpr.carpr_advskew; 1915 error--; 1916 } 1917 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1918 if (error > 0) 1919 error = EINVAL; 1920 else { 1921 error = 0; 1922 carp_setrun(sc, 0); 1923 } 1924 break; 1925 1926 case SIOCGVH: 1927 /* XXX: lockless read */ 1928 bzero(&carpr, sizeof(carpr)); 1929 carpr.carpr_state = sc->sc_state; 1930 carpr.carpr_vhid = sc->sc_vhid; 1931 carpr.carpr_advbase = sc->sc_advbase; 1932 carpr.carpr_advskew = sc->sc_advskew; 1933 error = priv_check(curthread, PRIV_NETINET_CARP); 1934 if (error == 0) 1935 bcopy(sc->sc_key, carpr.carpr_key, 1936 sizeof(carpr.carpr_key)); 1937 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1938 break; 1939 1940 default: 1941 error = EINVAL; 1942 } 1943 1944 if (locked) 1945 CARP_SCUNLOCK(sc); 1946 1947 carp_hmac_prepare(sc); 1948 1949 return (error); 1950 } 1951 1952 /* 1953 * XXX: this is looutput. We should eventually use it from there. 1954 */ 1955 static int 1956 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1957 struct rtentry *rt) 1958 { 1959 u_int32_t af; 1960 1961 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1962 1963 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1964 m_freem(m); 1965 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1966 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1967 } 1968 1969 ifp->if_opackets++; 1970 ifp->if_obytes += m->m_pkthdr.len; 1971 1972 /* BPF writes need to be handled specially. */ 1973 if (dst->sa_family == AF_UNSPEC) { 1974 bcopy(dst->sa_data, &af, sizeof(af)); 1975 dst->sa_family = af; 1976 } 1977 1978 #if 1 /* XXX */ 1979 switch (dst->sa_family) { 1980 case AF_INET: 1981 case AF_INET6: 1982 case AF_IPX: 1983 case AF_APPLETALK: 1984 break; 1985 default: 1986 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 1987 m_freem(m); 1988 return (EAFNOSUPPORT); 1989 } 1990 #endif 1991 return(if_simloop(ifp, m, dst->sa_family, 0)); 1992 } 1993 1994 /* 1995 * Start output on carp interface. This function should never be called. 1996 */ 1997 static void 1998 carp_start(struct ifnet *ifp) 1999 { 2000 #ifdef DEBUG 2001 printf("%s: start called\n", ifp->if_xname); 2002 #endif 2003 } 2004 2005 int 2006 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2007 struct rtentry *rt) 2008 { 2009 struct m_tag *mtag; 2010 struct carp_softc *sc; 2011 struct ifnet *carp_ifp; 2012 2013 if (!sa) 2014 return (0); 2015 2016 switch (sa->sa_family) { 2017 #ifdef INET 2018 case AF_INET: 2019 break; 2020 #endif /* INET */ 2021 #ifdef INET6 2022 case AF_INET6: 2023 break; 2024 #endif /* INET6 */ 2025 default: 2026 return (0); 2027 } 2028 2029 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2030 if (mtag == NULL) 2031 return (0); 2032 2033 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2034 sc = carp_ifp->if_softc; 2035 2036 /* Set the source MAC address to Virtual Router MAC Address */ 2037 switch (ifp->if_type) { 2038 case IFT_ETHER: 2039 case IFT_L2VLAN: { 2040 struct ether_header *eh; 2041 2042 eh = mtod(m, struct ether_header *); 2043 eh->ether_shost[0] = 0; 2044 eh->ether_shost[1] = 0; 2045 eh->ether_shost[2] = 0x5e; 2046 eh->ether_shost[3] = 0; 2047 eh->ether_shost[4] = 1; 2048 eh->ether_shost[5] = sc->sc_vhid; 2049 } 2050 break; 2051 case IFT_FDDI: { 2052 struct fddi_header *fh; 2053 2054 fh = mtod(m, struct fddi_header *); 2055 fh->fddi_shost[0] = 0; 2056 fh->fddi_shost[1] = 0; 2057 fh->fddi_shost[2] = 0x5e; 2058 fh->fddi_shost[3] = 0; 2059 fh->fddi_shost[4] = 1; 2060 fh->fddi_shost[5] = sc->sc_vhid; 2061 } 2062 break; 2063 case IFT_ISO88025: { 2064 struct iso88025_header *th; 2065 th = mtod(m, struct iso88025_header *); 2066 th->iso88025_shost[0] = 3; 2067 th->iso88025_shost[1] = 0; 2068 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2069 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2070 th->iso88025_shost[4] = 0; 2071 th->iso88025_shost[5] = 0; 2072 } 2073 break; 2074 default: 2075 printf("%s: carp is not supported for this interface type\n", 2076 ifp->if_xname); 2077 return (EOPNOTSUPP); 2078 } 2079 2080 return (0); 2081 } 2082 2083 static void 2084 carp_set_state(struct carp_softc *sc, int state) 2085 { 2086 2087 if (sc->sc_carpdev) 2088 CARP_SCLOCK_ASSERT(sc); 2089 2090 if (sc->sc_state == state) 2091 return; 2092 2093 sc->sc_state = state; 2094 switch (state) { 2095 case BACKUP: 2096 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN; 2097 break; 2098 case MASTER: 2099 SC2IFP(sc)->if_link_state = LINK_STATE_UP; 2100 break; 2101 default: 2102 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN; 2103 break; 2104 } 2105 rt_ifmsg(SC2IFP(sc)); 2106 } 2107 2108 void 2109 carp_carpdev_state(void *v) 2110 { 2111 struct carp_if *cif = v; 2112 2113 CARP_LOCK(cif); 2114 carp_carpdev_state_locked(cif); 2115 CARP_UNLOCK(cif); 2116 } 2117 2118 static void 2119 carp_carpdev_state_locked(struct carp_if *cif) 2120 { 2121 struct carp_softc *sc; 2122 2123 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2124 carp_sc_state_locked(sc); 2125 } 2126 2127 static void 2128 carp_sc_state_locked(struct carp_softc *sc) 2129 { 2130 CARP_SCLOCK_ASSERT(sc); 2131 2132 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2133 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2134 sc->sc_flags_backup = SC2IFP(sc)->if_flags; 2135 SC2IFP(sc)->if_flags &= ~IFF_UP; 2136 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 2137 callout_stop(&sc->sc_ad_tmo); 2138 callout_stop(&sc->sc_md_tmo); 2139 callout_stop(&sc->sc_md6_tmo); 2140 carp_set_state(sc, INIT); 2141 carp_setrun(sc, 0); 2142 if (!sc->sc_suppress) { 2143 carp_suppress_preempt++; 2144 if (carp_suppress_preempt == 1) { 2145 CARP_SCUNLOCK(sc); 2146 carp_send_ad_all(); 2147 CARP_SCLOCK(sc); 2148 } 2149 } 2150 sc->sc_suppress = 1; 2151 } else { 2152 SC2IFP(sc)->if_flags |= sc->sc_flags_backup; 2153 carp_set_state(sc, INIT); 2154 carp_setrun(sc, 0); 2155 if (sc->sc_suppress) 2156 carp_suppress_preempt--; 2157 sc->sc_suppress = 0; 2158 } 2159 2160 return; 2161 } 2162 2163 static int 2164 carp_modevent(module_t mod, int type, void *data) 2165 { 2166 switch (type) { 2167 case MOD_LOAD: 2168 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2169 carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 2170 if (if_detach_event_tag == NULL) 2171 return (ENOMEM); 2172 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2173 LIST_INIT(&carpif_list); 2174 if_clone_attach(&carp_cloner); 2175 break; 2176 2177 case MOD_UNLOAD: 2178 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2179 if_clone_detach(&carp_cloner); 2180 mtx_destroy(&carp_mtx); 2181 break; 2182 2183 default: 2184 return (EINVAL); 2185 } 2186 2187 return (0); 2188 } 2189 2190 static moduledata_t carp_mod = { 2191 "carp", 2192 carp_modevent, 2193 0 2194 }; 2195 2196 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2197