1 /* $FreeBSD$ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_carp.h" 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/proc.h> 45 #include <sys/sysctl.h> 46 #include <sys/syslog.h> 47 #include <sys/signalvar.h> 48 #include <sys/filio.h> 49 #include <sys/sockio.h> 50 51 #include <sys/socket.h> 52 #include <sys/vnode.h> 53 54 #include <machine/stdarg.h> 55 56 #include <net/bpf.h> 57 #include <net/ethernet.h> 58 #include <net/fddi.h> 59 #include <net/iso88025.h> 60 #include <net/if.h> 61 #include <net/if_clone.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 65 #ifdef INET 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/if_ether.h> 72 #include <machine/in_cksum.h> 73 #endif 74 75 #ifdef INET6 76 #include <netinet/icmp6.h> 77 #include <netinet/ip6.h> 78 #include <netinet6/ip6_var.h> 79 #include <netinet6/nd6.h> 80 #include <net/if_dl.h> 81 #endif 82 83 #include <crypto/sha1.h> 84 #include <netinet/ip_carp.h> 85 86 #define CARP_IFNAME "carp" 87 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 88 SYSCTL_DECL(_net_inet_carp); 89 90 struct carp_softc { 91 struct arpcom sc_ac; /* Interface clue */ 92 #define sc_if sc_ac.ac_if 93 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 94 struct in_ifaddr *sc_ia; /* primary iface address */ 95 struct ip_moptions sc_imo; 96 #ifdef INET6 97 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 98 struct ip6_moptions sc_im6o; 99 #endif /* INET6 */ 100 TAILQ_ENTRY(carp_softc) sc_list; 101 102 enum { INIT = 0, BACKUP, MASTER } sc_state; 103 104 int sc_flags_backup; 105 int sc_suppress; 106 107 int sc_sendad_errors; 108 #define CARP_SENDAD_MAX_ERRORS 3 109 int sc_sendad_success; 110 #define CARP_SENDAD_MIN_SUCCESS 3 111 112 int sc_vhid; 113 int sc_advskew; 114 int sc_naddrs; 115 int sc_naddrs6; 116 int sc_advbase; /* seconds */ 117 int sc_init_counter; 118 u_int64_t sc_counter; 119 120 /* authentication */ 121 #define CARP_HMAC_PAD 64 122 unsigned char sc_key[CARP_KEY_LEN]; 123 unsigned char sc_pad[CARP_HMAC_PAD]; 124 SHA1_CTX sc_sha1; 125 126 struct callout sc_ad_tmo; /* advertisement timeout */ 127 struct callout sc_md_tmo; /* master down timeout */ 128 struct callout sc_md6_tmo; /* master down timeout */ 129 130 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 131 }; 132 133 int carp_suppress_preempt = 0; 134 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 135 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 136 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 137 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 138 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 139 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 140 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 141 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 142 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 143 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 144 &carp_suppress_preempt, 0, "Preemption is suppressed"); 145 146 struct carpstats carpstats; 147 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 148 &carpstats, carpstats, 149 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 150 151 struct carp_if { 152 TAILQ_HEAD(, carp_softc) vhif_vrs; 153 int vhif_nvrs; 154 155 struct ifnet *vhif_ifp; 156 struct mtx vhif_mtx; 157 }; 158 159 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 160 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 161 162 /* lock per carp_if queue */ 163 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 164 NULL, MTX_DEF) 165 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 166 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 167 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 168 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 169 170 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 171 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 172 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 173 174 #define CARP_LOG(...) do { \ 175 if (carp_opts[CARPCTL_LOG] > 0) \ 176 log(LOG_INFO, __VA_ARGS__); \ 177 } while (0) 178 179 #define CARP_DEBUG(...) do { \ 180 if (carp_opts[CARPCTL_LOG] > 1) \ 181 log(LOG_DEBUG, __VA_ARGS__); \ 182 } while (0) 183 184 static void carp_hmac_prepare(struct carp_softc *); 185 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 186 unsigned char *); 187 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 188 unsigned char *); 189 static void carp_setroute(struct carp_softc *, int); 190 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 191 static int carp_clone_create(struct if_clone *, int); 192 static void carp_clone_destroy(struct ifnet *); 193 static void carpdetach(struct carp_softc *); 194 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 195 struct carp_header *); 196 static void carp_send_ad_all(void); 197 static void carp_send_ad(void *); 198 static void carp_send_ad_locked(struct carp_softc *); 199 static void carp_send_arp(struct carp_softc *); 200 static void carp_master_down(void *); 201 static void carp_master_down_locked(struct carp_softc *); 202 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 203 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 204 struct rtentry *); 205 static void carp_start(struct ifnet *); 206 static void carp_setrun(struct carp_softc *, sa_family_t); 207 static void carp_set_state(struct carp_softc *, int); 208 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 209 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 210 211 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 212 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 213 static void carp_carpdev_state_locked(struct carp_if *); 214 static void carp_sc_state_locked(struct carp_softc *); 215 #ifdef INET6 216 static void carp_send_na(struct carp_softc *); 217 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 218 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 219 #endif 220 221 static LIST_HEAD(, carp_softc) carpif_list; 222 static struct mtx carp_mtx; 223 IFC_SIMPLE_DECLARE(carp, 0); 224 225 static __inline u_int16_t 226 carp_cksum(struct mbuf *m, int len) 227 { 228 return (in_cksum(m, len)); 229 } 230 231 static void 232 carp_hmac_prepare(struct carp_softc *sc) 233 { 234 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 235 u_int8_t vhid = sc->sc_vhid & 0xff; 236 struct ifaddr *ifa; 237 int i; 238 #ifdef INET6 239 struct in6_addr in6; 240 #endif 241 242 if (sc->sc_carpdev) 243 CARP_SCLOCK(sc); 244 245 /* XXX: possible race here */ 246 247 /* compute ipad from key */ 248 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 249 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 250 for (i = 0; i < sizeof(sc->sc_pad); i++) 251 sc->sc_pad[i] ^= 0x36; 252 253 /* precompute first part of inner hash */ 254 SHA1Init(&sc->sc_sha1); 255 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 256 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 257 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 258 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 259 #ifdef INET 260 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 261 if (ifa->ifa_addr->sa_family == AF_INET) 262 SHA1Update(&sc->sc_sha1, 263 (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, 264 sizeof(struct in_addr)); 265 } 266 #endif /* INET */ 267 #ifdef INET6 268 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 269 if (ifa->ifa_addr->sa_family == AF_INET6) { 270 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 271 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 272 in6.s6_addr16[1] = 0; 273 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 274 } 275 } 276 #endif /* INET6 */ 277 278 /* convert ipad to opad */ 279 for (i = 0; i < sizeof(sc->sc_pad); i++) 280 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 281 282 if (sc->sc_carpdev) 283 CARP_SCUNLOCK(sc); 284 } 285 286 static void 287 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 288 unsigned char md[20]) 289 { 290 SHA1_CTX sha1ctx; 291 292 /* fetch first half of inner hash */ 293 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 294 295 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 296 SHA1Final(md, &sha1ctx); 297 298 /* outer hash */ 299 SHA1Init(&sha1ctx); 300 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 301 SHA1Update(&sha1ctx, md, 20); 302 SHA1Final(md, &sha1ctx); 303 } 304 305 static int 306 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 307 unsigned char md[20]) 308 { 309 unsigned char md2[20]; 310 311 CARP_SCLOCK_ASSERT(sc); 312 313 carp_hmac_generate(sc, counter, md2); 314 315 return (bcmp(md, md2, sizeof(md2))); 316 } 317 318 static void 319 carp_setroute(struct carp_softc *sc, int cmd) 320 { 321 struct ifaddr *ifa; 322 int s; 323 324 if (sc->sc_carpdev) 325 CARP_SCLOCK_ASSERT(sc); 326 327 s = splnet(); 328 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 329 if (ifa->ifa_addr->sa_family == AF_INET && 330 sc->sc_carpdev != NULL) { 331 int count = carp_addrcount( 332 (struct carp_if *)sc->sc_carpdev->if_carp, 333 ifatoia(ifa), CARP_COUNT_MASTER); 334 335 if ((cmd == RTM_ADD && count == 1) || 336 (cmd == RTM_DELETE && count == 0)) 337 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 338 } 339 #ifdef INET6 340 if (ifa->ifa_addr->sa_family == AF_INET6) { 341 if (cmd == RTM_ADD) 342 in6_ifaddloop(ifa); 343 else 344 in6_ifremloop(ifa); 345 } 346 #endif /* INET6 */ 347 } 348 splx(s); 349 } 350 351 static int 352 carp_clone_create(struct if_clone *ifc, int unit) 353 { 354 355 struct carp_softc *sc; 356 struct ifnet *ifp; 357 358 MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 359 360 sc->sc_flags_backup = 0; 361 sc->sc_suppress = 0; 362 sc->sc_advbase = CARP_DFLTINTV; 363 sc->sc_vhid = -1; /* required setting */ 364 sc->sc_advskew = 0; 365 sc->sc_init_counter = 1; 366 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 367 #ifdef INET6 368 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 369 #endif 370 371 callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE); 372 callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE); 373 callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE); 374 375 ifp = &sc->sc_if; 376 ifp->if_softc = sc; 377 if_initname(ifp, CARP_IFNAME, unit); 378 ifp->if_mtu = ETHERMTU; 379 ifp->if_flags = 0; 380 ifp->if_ioctl = carp_ioctl; 381 ifp->if_output = carp_looutput; 382 ifp->if_start = carp_start; 383 ifp->if_type = IFT_CARP; 384 ifp->if_snd.ifq_maxlen = ifqmaxlen; 385 ifp->if_hdrlen = 0; 386 if_attach(ifp); 387 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t)); 388 mtx_lock(&carp_mtx); 389 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 390 mtx_unlock(&carp_mtx); 391 return (0); 392 } 393 394 static void 395 carp_clone_destroy(struct ifnet *ifp) 396 { 397 struct carp_softc *sc = ifp->if_softc; 398 struct carp_if *cif; 399 struct ip_moptions *imo = &sc->sc_imo; 400 #ifdef INET6 401 struct ip6_moptions *im6o = &sc->sc_im6o; 402 #endif 403 404 /* carpdetach(sc); */ 405 406 /* 407 * If an interface is destroyed which is suppressing the preemption, 408 * decrease the global counter, otherwise the host will never get 409 * out of the carp supressing state. 410 */ 411 if (sc->sc_suppress) 412 carp_suppress_preempt--; 413 sc->sc_suppress = 0; 414 415 callout_stop(&sc->sc_ad_tmo); 416 callout_stop(&sc->sc_md_tmo); 417 callout_stop(&sc->sc_md6_tmo); 418 419 if (imo->imo_num_memberships) { 420 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 421 imo->imo_multicast_ifp = NULL; 422 } 423 #ifdef INET6 424 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 425 struct in6_multi_mship *imm = 426 LIST_FIRST(&im6o->im6o_memberships); 427 LIST_REMOVE(imm, i6mm_chain); 428 in6_leavegroup(imm); 429 } 430 im6o->im6o_multicast_ifp = NULL; 431 #endif 432 433 /* Remove ourself from parents if_carp queue */ 434 if (sc->sc_carpdev && (cif = sc->sc_carpdev->if_carp)) { 435 CARP_LOCK(cif); 436 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 437 if (!--cif->vhif_nvrs) { 438 sc->sc_carpdev->if_carp = NULL; 439 CARP_LOCK_DESTROY(cif); 440 FREE(cif, M_CARP); 441 ifpromisc(sc->sc_carpdev, 0); 442 } else { 443 CARP_UNLOCK(cif); 444 } 445 } 446 447 mtx_lock(&carp_mtx); 448 LIST_REMOVE(sc, sc_next); 449 mtx_unlock(&carp_mtx); 450 bpfdetach(ifp); 451 if_detach(ifp); 452 free(sc, M_CARP); 453 } 454 455 /* 456 * process input packet. 457 * we have rearranged checks order compared to the rfc, 458 * but it seems more efficient this way or not possible otherwise. 459 */ 460 void 461 carp_input(struct mbuf *m, int hlen) 462 { 463 struct ip *ip = mtod(m, struct ip *); 464 struct carp_header *ch; 465 int iplen, len; 466 467 carpstats.carps_ipackets++; 468 469 if (!carp_opts[CARPCTL_ALLOW]) { 470 m_freem(m); 471 return; 472 } 473 474 /* check if received on a valid carp interface */ 475 if (m->m_pkthdr.rcvif->if_carp == NULL) { 476 carpstats.carps_badif++; 477 CARP_LOG("carp_input: packet received on non-carp " 478 "interface: %s\n", 479 m->m_pkthdr.rcvif->if_xname); 480 m_freem(m); 481 return; 482 } 483 484 /* verify that the IP TTL is 255. */ 485 if (ip->ip_ttl != CARP_DFLTTL) { 486 carpstats.carps_badttl++; 487 CARP_LOG("carp_input: received ttl %d != 255i on %s\n", 488 ip->ip_ttl, 489 m->m_pkthdr.rcvif->if_xname); 490 m_freem(m); 491 return; 492 } 493 494 iplen = ip->ip_hl << 2; 495 496 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 497 carpstats.carps_badlen++; 498 CARP_LOG("carp_input: received len %zd < " 499 "sizeof(struct carp_header)\n", 500 m->m_len - sizeof(struct ip)); 501 m_freem(m); 502 return; 503 } 504 505 if (iplen + sizeof(*ch) < m->m_len) { 506 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 507 carpstats.carps_hdrops++; 508 CARP_LOG("carp_input: pullup failed\n"); 509 return; 510 } 511 ip = mtod(m, struct ip *); 512 } 513 ch = (struct carp_header *)((char *)ip + iplen); 514 515 /* 516 * verify that the received packet length is 517 * equal to the CARP header 518 */ 519 len = iplen + sizeof(*ch); 520 if (len > m->m_pkthdr.len) { 521 carpstats.carps_badlen++; 522 CARP_LOG("carp_input: packet too short %d on %s\n", 523 m->m_pkthdr.len, 524 m->m_pkthdr.rcvif->if_xname); 525 m_freem(m); 526 return; 527 } 528 529 if ((m = m_pullup(m, len)) == NULL) { 530 carpstats.carps_hdrops++; 531 return; 532 } 533 ip = mtod(m, struct ip *); 534 ch = (struct carp_header *)((char *)ip + iplen); 535 536 /* verify the CARP checksum */ 537 m->m_data += iplen; 538 if (carp_cksum(m, len - iplen)) { 539 carpstats.carps_badsum++; 540 CARP_LOG("carp_input: checksum failed on %s\n", 541 m->m_pkthdr.rcvif->if_xname); 542 m_freem(m); 543 return; 544 } 545 m->m_data -= iplen; 546 547 carp_input_c(m, ch, AF_INET); 548 } 549 550 #ifdef INET6 551 int 552 carp6_input(struct mbuf **mp, int *offp, int proto) 553 { 554 struct mbuf *m = *mp; 555 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 556 struct carp_header *ch; 557 u_int len; 558 559 carpstats.carps_ipackets6++; 560 561 if (!carp_opts[CARPCTL_ALLOW]) { 562 m_freem(m); 563 return (IPPROTO_DONE); 564 } 565 566 /* check if received on a valid carp interface */ 567 if (m->m_pkthdr.rcvif->if_carp == NULL) { 568 carpstats.carps_badif++; 569 CARP_LOG("carp6_input: packet received on non-carp " 570 "interface: %s\n", 571 m->m_pkthdr.rcvif->if_xname); 572 m_freem(m); 573 return (IPPROTO_DONE); 574 } 575 576 /* verify that the IP TTL is 255 */ 577 if (ip6->ip6_hlim != CARP_DFLTTL) { 578 carpstats.carps_badttl++; 579 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 580 ip6->ip6_hlim, 581 m->m_pkthdr.rcvif->if_xname); 582 m_freem(m); 583 return (IPPROTO_DONE); 584 } 585 586 /* verify that we have a complete carp packet */ 587 len = m->m_len; 588 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 589 if (ch == NULL) { 590 carpstats.carps_badlen++; 591 CARP_LOG("carp6_input: packet size %u too small\n", len); 592 return (IPPROTO_DONE); 593 } 594 595 596 /* verify the CARP checksum */ 597 m->m_data += *offp; 598 if (carp_cksum(m, sizeof(*ch))) { 599 carpstats.carps_badsum++; 600 CARP_LOG("carp6_input: checksum failed, on %s\n", 601 m->m_pkthdr.rcvif->if_xname); 602 m_freem(m); 603 return (IPPROTO_DONE); 604 } 605 m->m_data -= *offp; 606 607 carp_input_c(m, ch, AF_INET6); 608 return (IPPROTO_DONE); 609 } 610 #endif /* INET6 */ 611 612 static void 613 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 614 { 615 struct ifnet *ifp = m->m_pkthdr.rcvif; 616 struct carp_softc *sc; 617 u_int64_t tmp_counter; 618 struct timeval sc_tv, ch_tv; 619 620 /* verify that the VHID is valid on the receiving interface */ 621 CARP_LOCK(ifp->if_carp); 622 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 623 if (sc->sc_vhid == ch->carp_vhid) 624 break; 625 626 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 627 (IFF_UP|IFF_RUNNING)) { 628 carpstats.carps_badvhid++; 629 CARP_UNLOCK(ifp->if_carp); 630 m_freem(m); 631 return; 632 } 633 634 getmicrotime(&sc->sc_if.if_lastchange); 635 sc->sc_if.if_ipackets++; 636 sc->sc_if.if_ibytes += m->m_pkthdr.len; 637 638 if (sc->sc_if.if_bpf) { 639 struct ip *ip = mtod(m, struct ip *); 640 uint32_t af1 = af; 641 642 /* BPF wants net byte order */ 643 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 644 ip->ip_off = htons(ip->ip_off); 645 bpf_mtap2(sc->sc_if.if_bpf, &af1, sizeof(af1), m); 646 } 647 648 /* verify the CARP version. */ 649 if (ch->carp_version != CARP_VERSION) { 650 carpstats.carps_badver++; 651 sc->sc_if.if_ierrors++; 652 CARP_UNLOCK(ifp->if_carp); 653 CARP_LOG("%s; invalid version %d\n", 654 sc->sc_if.if_xname, 655 ch->carp_version); 656 m_freem(m); 657 return; 658 } 659 660 /* verify the hash */ 661 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 662 carpstats.carps_badauth++; 663 sc->sc_if.if_ierrors++; 664 CARP_UNLOCK(ifp->if_carp); 665 CARP_LOG("%s: incorrect hash\n", sc->sc_if.if_xname); 666 m_freem(m); 667 return; 668 } 669 670 tmp_counter = ntohl(ch->carp_counter[0]); 671 tmp_counter = tmp_counter<<32; 672 tmp_counter += ntohl(ch->carp_counter[1]); 673 674 /* XXX Replay protection goes here */ 675 676 sc->sc_init_counter = 0; 677 sc->sc_counter = tmp_counter; 678 679 sc_tv.tv_sec = sc->sc_advbase; 680 if (carp_suppress_preempt && sc->sc_advskew < 240) 681 sc_tv.tv_usec = 240 * 1000000 / 256; 682 else 683 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 684 ch_tv.tv_sec = ch->carp_advbase; 685 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 686 687 switch (sc->sc_state) { 688 case INIT: 689 break; 690 case MASTER: 691 /* 692 * If we receive an advertisement from a master who's going to 693 * be more frequent than us, go into BACKUP state. 694 */ 695 if (timevalcmp(&sc_tv, &ch_tv, >) || 696 timevalcmp(&sc_tv, &ch_tv, ==)) { 697 callout_stop(&sc->sc_ad_tmo); 698 CARP_DEBUG("%s: MASTER -> BACKUP " 699 "(more frequent advertisement received)\n", 700 sc->sc_if.if_xname); 701 carp_set_state(sc, BACKUP); 702 carp_setrun(sc, 0); 703 carp_setroute(sc, RTM_DELETE); 704 } 705 break; 706 case BACKUP: 707 /* 708 * If we're pre-empting masters who advertise slower than us, 709 * and this one claims to be slower, treat him as down. 710 */ 711 if (carp_opts[CARPCTL_PREEMPT] && 712 timevalcmp(&sc_tv, &ch_tv, <)) { 713 CARP_DEBUG("%s: BACKUP -> MASTER " 714 "(preempting a slower master)\n", 715 sc->sc_if.if_xname); 716 carp_master_down_locked(sc); 717 break; 718 } 719 720 /* 721 * If the master is going to advertise at such a low frequency 722 * that he's guaranteed to time out, we'd might as well just 723 * treat him as timed out now. 724 */ 725 sc_tv.tv_sec = sc->sc_advbase * 3; 726 if (timevalcmp(&sc_tv, &ch_tv, <)) { 727 CARP_DEBUG("%s: BACKUP -> MASTER " 728 "(master timed out)\n", 729 sc->sc_if.if_xname); 730 carp_master_down_locked(sc); 731 break; 732 } 733 734 /* 735 * Otherwise, we reset the counter and wait for the next 736 * advertisement. 737 */ 738 carp_setrun(sc, af); 739 break; 740 } 741 742 CARP_UNLOCK(ifp->if_carp); 743 744 m_freem(m); 745 return; 746 } 747 748 static void 749 carpdetach(struct carp_softc *sc) 750 { 751 struct ifaddr *ifa; 752 753 callout_stop(&sc->sc_ad_tmo); 754 callout_stop(&sc->sc_md_tmo); 755 callout_stop(&sc->sc_md6_tmo); 756 757 while ((ifa = TAILQ_FIRST(&sc->sc_if.if_addrlist)) != NULL) 758 if (ifa->ifa_addr->sa_family == AF_INET) { 759 struct in_ifaddr *ia = ifatoia(ifa); 760 761 carp_del_addr(sc, &ia->ia_addr); 762 763 /* ripped screaming from in_control(SIOCDIFADDR) */ 764 in_ifscrub(&sc->sc_if, ia); 765 TAILQ_REMOVE(&sc->sc_if.if_addrlist, ifa, ifa_link); 766 TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); 767 IFAFREE((&ia->ia_ifa)); 768 } 769 } 770 771 /* Detach an interface from the carp. */ 772 void 773 carp_ifdetach(struct ifnet *ifp) 774 { 775 struct carp_softc *sc; 776 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 777 778 CARP_LOCK(cif); 779 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 780 carpdetach(sc); 781 CARP_UNLOCK(cif); 782 } 783 784 static int 785 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 786 { 787 struct m_tag *mtag; 788 struct ifnet *ifp = &sc->sc_if; 789 790 if (sc->sc_init_counter) { 791 /* this could also be seconds since unix epoch */ 792 sc->sc_counter = arc4random(); 793 sc->sc_counter = sc->sc_counter << 32; 794 sc->sc_counter += arc4random(); 795 } else 796 sc->sc_counter++; 797 798 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 799 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 800 801 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 802 803 /* Tag packet for carp_output */ 804 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 805 if (mtag == NULL) { 806 m_freem(m); 807 sc->sc_if.if_oerrors++; 808 return (ENOMEM); 809 } 810 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 811 m_tag_prepend(m, mtag); 812 813 return (0); 814 } 815 816 static void 817 carp_send_ad_all(void) 818 { 819 struct carp_softc *sc; 820 821 mtx_lock(&carp_mtx); 822 LIST_FOREACH(sc, &carpif_list, sc_next) { 823 if (sc->sc_carpdev == NULL) 824 continue; 825 CARP_SCLOCK(sc); 826 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) && 827 sc->sc_state == MASTER) 828 carp_send_ad_locked(sc); 829 CARP_SCUNLOCK(sc); 830 } 831 mtx_unlock(&carp_mtx); 832 } 833 834 static void 835 carp_send_ad(void *v) 836 { 837 struct carp_softc *sc = v; 838 839 CARP_SCLOCK(sc); 840 carp_send_ad_locked(sc); 841 CARP_SCUNLOCK(sc); 842 } 843 844 static void 845 carp_send_ad_locked(struct carp_softc *sc) 846 { 847 struct carp_header ch; 848 struct timeval tv; 849 struct carp_header *ch_ptr; 850 struct mbuf *m; 851 int len, advbase, advskew; 852 853 CARP_SCLOCK_ASSERT(sc); 854 855 /* bow out if we've lost our UPness or RUNNINGuiness */ 856 if ((sc->sc_if.if_flags & 857 (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 858 advbase = 255; 859 advskew = 255; 860 } else { 861 advbase = sc->sc_advbase; 862 if (!carp_suppress_preempt || sc->sc_advskew > 240) 863 advskew = sc->sc_advskew; 864 else 865 advskew = 240; 866 tv.tv_sec = advbase; 867 tv.tv_usec = advskew * 1000000 / 256; 868 } 869 870 ch.carp_version = CARP_VERSION; 871 ch.carp_type = CARP_ADVERTISEMENT; 872 ch.carp_vhid = sc->sc_vhid; 873 ch.carp_advbase = advbase; 874 ch.carp_advskew = advskew; 875 ch.carp_authlen = 7; /* XXX DEFINE */ 876 ch.carp_pad1 = 0; /* must be zero */ 877 ch.carp_cksum = 0; 878 879 #ifdef INET 880 if (sc->sc_ia) { 881 struct ip *ip; 882 883 MGETHDR(m, M_DONTWAIT, MT_HEADER); 884 if (m == NULL) { 885 sc->sc_ac.ac_if.if_oerrors++; 886 carpstats.carps_onomem++; 887 /* XXX maybe less ? */ 888 if (advbase != 255 || advskew != 255) 889 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 890 carp_send_ad, sc); 891 return; 892 } 893 len = sizeof(*ip) + sizeof(ch); 894 m->m_pkthdr.len = len; 895 m->m_pkthdr.rcvif = NULL; 896 m->m_len = len; 897 MH_ALIGN(m, m->m_len); 898 m->m_flags |= M_MCAST; 899 ip = mtod(m, struct ip *); 900 ip->ip_v = IPVERSION; 901 ip->ip_hl = sizeof(*ip) >> 2; 902 ip->ip_tos = IPTOS_LOWDELAY; 903 ip->ip_len = len; 904 ip->ip_id = ip_newid(); 905 ip->ip_off = IP_DF; 906 ip->ip_ttl = CARP_DFLTTL; 907 ip->ip_p = IPPROTO_CARP; 908 ip->ip_sum = 0; 909 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 910 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 911 912 ch_ptr = (struct carp_header *)(&ip[1]); 913 bcopy(&ch, ch_ptr, sizeof(ch)); 914 if (carp_prepare_ad(m, sc, ch_ptr)) 915 return; 916 917 m->m_data += sizeof(*ip); 918 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 919 m->m_data -= sizeof(*ip); 920 921 getmicrotime(&sc->sc_if.if_lastchange); 922 sc->sc_ac.ac_if.if_opackets++; 923 sc->sc_ac.ac_if.if_obytes += len; 924 carpstats.carps_opackets++; 925 926 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 927 sc->sc_if.if_oerrors++; 928 if (sc->sc_sendad_errors < INT_MAX) 929 sc->sc_sendad_errors++; 930 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 931 carp_suppress_preempt++; 932 if (carp_suppress_preempt == 1) { 933 CARP_SCUNLOCK(sc); 934 carp_send_ad_all(); 935 CARP_SCLOCK(sc); 936 } 937 } 938 sc->sc_sendad_success = 0; 939 } else { 940 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 941 if (++sc->sc_sendad_success >= 942 CARP_SENDAD_MIN_SUCCESS) { 943 carp_suppress_preempt--; 944 sc->sc_sendad_errors = 0; 945 } 946 } else 947 sc->sc_sendad_errors = 0; 948 } 949 } 950 #endif /* INET */ 951 #ifdef INET6 952 if (sc->sc_ia6) { 953 struct ip6_hdr *ip6; 954 955 MGETHDR(m, M_DONTWAIT, MT_HEADER); 956 if (m == NULL) { 957 sc->sc_ac.ac_if.if_oerrors++; 958 carpstats.carps_onomem++; 959 /* XXX maybe less ? */ 960 if (advbase != 255 || advskew != 255) 961 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 962 carp_send_ad, sc); 963 return; 964 } 965 len = sizeof(*ip6) + sizeof(ch); 966 m->m_pkthdr.len = len; 967 m->m_pkthdr.rcvif = NULL; 968 m->m_len = len; 969 MH_ALIGN(m, m->m_len); 970 m->m_flags |= M_MCAST; 971 ip6 = mtod(m, struct ip6_hdr *); 972 bzero(ip6, sizeof(*ip6)); 973 ip6->ip6_vfc |= IPV6_VERSION; 974 ip6->ip6_hlim = CARP_DFLTTL; 975 ip6->ip6_nxt = IPPROTO_CARP; 976 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 977 sizeof(struct in6_addr)); 978 /* set the multicast destination */ 979 980 ip6->ip6_dst.s6_addr8[0] = 0xff; 981 ip6->ip6_dst.s6_addr8[1] = 0x02; 982 ip6->ip6_dst.s6_addr8[15] = 0x12; 983 984 ch_ptr = (struct carp_header *)(&ip6[1]); 985 bcopy(&ch, ch_ptr, sizeof(ch)); 986 if (carp_prepare_ad(m, sc, ch_ptr)) 987 return; 988 989 m->m_data += sizeof(*ip6); 990 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 991 m->m_data -= sizeof(*ip6); 992 993 getmicrotime(&sc->sc_if.if_lastchange); 994 sc->sc_if.if_opackets++; 995 sc->sc_if.if_obytes += len; 996 carpstats.carps_opackets6++; 997 998 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 999 sc->sc_if.if_oerrors++; 1000 if (sc->sc_sendad_errors < INT_MAX) 1001 sc->sc_sendad_errors++; 1002 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1003 carp_suppress_preempt++; 1004 if (carp_suppress_preempt == 1) { 1005 CARP_SCUNLOCK(sc); 1006 carp_send_ad_all(); 1007 CARP_SCLOCK(sc); 1008 } 1009 } 1010 sc->sc_sendad_success = 0; 1011 } else { 1012 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1013 if (++sc->sc_sendad_success >= 1014 CARP_SENDAD_MIN_SUCCESS) { 1015 carp_suppress_preempt--; 1016 sc->sc_sendad_errors = 0; 1017 } 1018 } else 1019 sc->sc_sendad_errors = 0; 1020 } 1021 } 1022 #endif /* INET6 */ 1023 1024 if (advbase != 255 || advskew != 255) 1025 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1026 carp_send_ad, sc); 1027 1028 } 1029 1030 /* 1031 * Broadcast a gratuitous ARP request containing 1032 * the virtual router MAC address for each IP address 1033 * associated with the virtual router. 1034 */ 1035 static void 1036 carp_send_arp(struct carp_softc *sc) 1037 { 1038 struct ifaddr *ifa; 1039 1040 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1041 1042 if (ifa->ifa_addr->sa_family != AF_INET) 1043 continue; 1044 1045 /* arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); */ 1046 arp_ifinit2(sc->sc_carpdev, ifa, sc->sc_ac.ac_enaddr); 1047 1048 DELAY(1000); /* XXX */ 1049 } 1050 } 1051 1052 #ifdef INET6 1053 static void 1054 carp_send_na(struct carp_softc *sc) 1055 { 1056 struct ifaddr *ifa; 1057 struct in6_addr *in6; 1058 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1059 1060 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1061 1062 if (ifa->ifa_addr->sa_family != AF_INET6) 1063 continue; 1064 1065 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1066 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1067 ND_NA_FLAG_OVERRIDE, 1, NULL); 1068 DELAY(1000); /* XXX */ 1069 } 1070 } 1071 #endif /* INET6 */ 1072 1073 static int 1074 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1075 { 1076 struct carp_softc *vh; 1077 struct ifaddr *ifa; 1078 int count = 0; 1079 1080 CARP_LOCK_ASSERT(cif); 1081 1082 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1083 if ((type == CARP_COUNT_RUNNING && 1084 (vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1085 (IFF_UP|IFF_RUNNING)) || 1086 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1087 TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist, 1088 ifa_list) { 1089 if (ifa->ifa_addr->sa_family == AF_INET && 1090 ia->ia_addr.sin_addr.s_addr == 1091 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1092 count++; 1093 } 1094 } 1095 } 1096 return (count); 1097 } 1098 1099 int 1100 carp_iamatch(void *v, struct in_ifaddr *ia, 1101 struct in_addr *isaddr, u_int8_t **enaddr) 1102 { 1103 struct carp_if *cif = v; 1104 struct carp_softc *vh; 1105 int index, count = 0; 1106 struct ifaddr *ifa; 1107 1108 CARP_LOCK(cif); 1109 1110 if (carp_opts[CARPCTL_ARPBALANCE]) { 1111 /* 1112 * XXX proof of concept implementation. 1113 * We use the source ip to decide which virtual host should 1114 * handle the request. If we're master of that virtual host, 1115 * then we respond, otherwise, just drop the arp packet on 1116 * the floor. 1117 */ 1118 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1119 if (count == 0) { 1120 /* should never reach this */ 1121 CARP_UNLOCK(cif); 1122 return (0); 1123 } 1124 1125 /* this should be a hash, like pf_hash() */ 1126 index = isaddr->s_addr % count; 1127 count = 0; 1128 1129 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1130 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1131 (IFF_UP|IFF_RUNNING)) { 1132 TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, 1133 ifa_list) { 1134 if (ifa->ifa_addr->sa_family == 1135 AF_INET && 1136 ia->ia_addr.sin_addr.s_addr == 1137 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1138 if (count == index) { 1139 if (vh->sc_state == 1140 MASTER) { 1141 *enaddr = vh->sc_ac.ac_enaddr; 1142 CARP_UNLOCK(cif); 1143 return (1); 1144 } else { 1145 CARP_UNLOCK(cif); 1146 return (0); 1147 } 1148 } 1149 count++; 1150 } 1151 } 1152 } 1153 } 1154 } else { 1155 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1156 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1157 (IFF_UP|IFF_RUNNING) && ia->ia_ifp == 1158 &vh->sc_if) { 1159 *enaddr = vh->sc_ac.ac_enaddr; 1160 CARP_UNLOCK(cif); 1161 return (1); 1162 } 1163 } 1164 } 1165 CARP_UNLOCK(cif); 1166 return (0); 1167 } 1168 1169 #ifdef INET6 1170 struct ifaddr * 1171 carp_iamatch6(void *v, struct in6_addr *taddr) 1172 { 1173 struct carp_if *cif = v; 1174 struct carp_softc *vh; 1175 struct ifaddr *ifa; 1176 1177 CARP_LOCK(cif); 1178 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1179 TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { 1180 if (IN6_ARE_ADDR_EQUAL(taddr, 1181 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1182 ((vh->sc_if.if_flags & 1183 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { 1184 CARP_UNLOCK(cif); 1185 return (ifa); 1186 } 1187 } 1188 } 1189 CARP_UNLOCK(cif); 1190 1191 return (NULL); 1192 } 1193 1194 void * 1195 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1196 { 1197 struct m_tag *mtag; 1198 struct carp_if *cif = v; 1199 struct carp_softc *sc; 1200 struct ifaddr *ifa; 1201 1202 CARP_LOCK(cif); 1203 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1204 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1205 if (IN6_ARE_ADDR_EQUAL(taddr, 1206 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1207 ((sc->sc_if.if_flags & 1208 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { 1209 struct ifnet *ifp = &sc->sc_if; 1210 mtag = m_tag_get(PACKET_TAG_CARP, 1211 sizeof(struct ifnet *), M_NOWAIT); 1212 if (mtag == NULL) { 1213 /* better a bit than nothing */ 1214 CARP_UNLOCK(cif); 1215 return (sc->sc_ac.ac_enaddr); 1216 } 1217 bcopy(&ifp, (caddr_t)(mtag + 1), 1218 sizeof(struct ifnet *)); 1219 m_tag_prepend(m, mtag); 1220 1221 CARP_UNLOCK(cif); 1222 return (sc->sc_ac.ac_enaddr); 1223 } 1224 } 1225 } 1226 CARP_UNLOCK(cif); 1227 1228 return (NULL); 1229 } 1230 #endif 1231 1232 struct ifnet * 1233 carp_forus(void *v, void *dhost) 1234 { 1235 struct carp_if *cif = v; 1236 struct carp_softc *vh; 1237 u_int8_t *ena = dhost; 1238 1239 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1240 return (NULL); 1241 1242 CARP_LOCK(cif); 1243 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1244 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1245 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1246 !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) { 1247 CARP_UNLOCK(cif); 1248 return (&vh->sc_if); 1249 } 1250 1251 CARP_UNLOCK(cif); 1252 return (NULL); 1253 } 1254 1255 static void 1256 carp_master_down(void *v) 1257 { 1258 struct carp_softc *sc = v; 1259 1260 CARP_SCLOCK(sc); 1261 carp_master_down_locked(sc); 1262 CARP_SCUNLOCK(sc); 1263 } 1264 1265 static void 1266 carp_master_down_locked(struct carp_softc *sc) 1267 { 1268 if (sc->sc_carpdev) 1269 CARP_SCLOCK_ASSERT(sc); 1270 1271 switch (sc->sc_state) { 1272 case INIT: 1273 printf("%s: master_down event in INIT state\n", 1274 sc->sc_if.if_xname); 1275 break; 1276 case MASTER: 1277 break; 1278 case BACKUP: 1279 carp_set_state(sc, MASTER); 1280 carp_send_ad_locked(sc); 1281 carp_send_arp(sc); 1282 #ifdef INET6 1283 carp_send_na(sc); 1284 #endif /* INET6 */ 1285 carp_setrun(sc, 0); 1286 carp_setroute(sc, RTM_ADD); 1287 break; 1288 } 1289 } 1290 1291 /* 1292 * When in backup state, af indicates whether to reset the master down timer 1293 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1294 */ 1295 static void 1296 carp_setrun(struct carp_softc *sc, sa_family_t af) 1297 { 1298 struct timeval tv; 1299 1300 if (sc->sc_carpdev) 1301 CARP_SCLOCK_ASSERT(sc); 1302 1303 if (sc->sc_if.if_flags & IFF_UP && 1304 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) 1305 sc->sc_if.if_flags |= IFF_RUNNING; 1306 else { 1307 sc->sc_if.if_flags &= ~IFF_RUNNING; 1308 carp_setroute(sc, RTM_DELETE); 1309 return; 1310 } 1311 1312 switch (sc->sc_state) { 1313 case INIT: 1314 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1315 carp_send_ad_locked(sc); 1316 carp_send_arp(sc); 1317 #ifdef INET6 1318 carp_send_na(sc); 1319 #endif /* INET6 */ 1320 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1321 sc->sc_if.if_xname); 1322 carp_set_state(sc, MASTER); 1323 carp_setroute(sc, RTM_ADD); 1324 } else { 1325 CARP_DEBUG("%s: INIT -> BACKUP\n", sc->sc_if.if_xname); 1326 carp_set_state(sc, BACKUP); 1327 carp_setroute(sc, RTM_DELETE); 1328 carp_setrun(sc, 0); 1329 } 1330 break; 1331 case BACKUP: 1332 callout_stop(&sc->sc_ad_tmo); 1333 tv.tv_sec = 3 * sc->sc_advbase; 1334 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1335 switch (af) { 1336 #ifdef INET 1337 case AF_INET: 1338 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1339 carp_master_down, sc); 1340 break; 1341 #endif /* INET */ 1342 #ifdef INET6 1343 case AF_INET6: 1344 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1345 carp_master_down, sc); 1346 break; 1347 #endif /* INET6 */ 1348 default: 1349 if (sc->sc_naddrs) 1350 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1351 carp_master_down, sc); 1352 if (sc->sc_naddrs6) 1353 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1354 carp_master_down, sc); 1355 break; 1356 } 1357 break; 1358 case MASTER: 1359 tv.tv_sec = sc->sc_advbase; 1360 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1361 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1362 carp_send_ad, sc); 1363 break; 1364 } 1365 } 1366 1367 static int 1368 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1369 { 1370 struct ifnet *ifp; 1371 struct carp_if *cif; 1372 struct in_ifaddr *ia, *ia_if; 1373 struct ip_moptions *imo = &sc->sc_imo; 1374 struct in_addr addr; 1375 u_long iaddr = htonl(sin->sin_addr.s_addr); 1376 int own, error; 1377 1378 if (sin->sin_addr.s_addr == 0) { 1379 if (!(sc->sc_if.if_flags & IFF_UP)) 1380 carp_set_state(sc, INIT); 1381 if (sc->sc_naddrs) 1382 sc->sc_if.if_flags |= IFF_UP; 1383 carp_setrun(sc, 0); 1384 return (0); 1385 } 1386 1387 /* we have to do it by hands to check we won't match on us */ 1388 ia_if = NULL; own = 0; 1389 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1390 /* and, yeah, we need a multicast-capable iface too */ 1391 if (ia->ia_ifp != &sc->sc_if && 1392 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1393 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1394 if (!ia_if) 1395 ia_if = ia; 1396 if (sin->sin_addr.s_addr == 1397 ia->ia_addr.sin_addr.s_addr) 1398 own++; 1399 } 1400 } 1401 1402 if (!ia_if) 1403 return (EADDRNOTAVAIL); 1404 1405 ia = ia_if; 1406 ifp = ia->ia_ifp; 1407 1408 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1409 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) 1410 return (EADDRNOTAVAIL); 1411 1412 if (imo->imo_num_memberships == 0) { 1413 addr.s_addr = htonl(INADDR_CARP_GROUP); 1414 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 1415 return (ENOBUFS); 1416 imo->imo_num_memberships++; 1417 imo->imo_multicast_ifp = ifp; 1418 imo->imo_multicast_ttl = CARP_DFLTTL; 1419 imo->imo_multicast_loop = 0; 1420 } 1421 1422 if (!ifp->if_carp) { 1423 1424 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1425 M_WAITOK|M_ZERO); 1426 if (!cif) { 1427 error = ENOBUFS; 1428 goto cleanup; 1429 } 1430 if ((error = ifpromisc(ifp, 1))) { 1431 FREE(cif, M_CARP); 1432 goto cleanup; 1433 } 1434 1435 CARP_LOCK_INIT(cif); 1436 CARP_LOCK(cif); 1437 cif->vhif_ifp = ifp; 1438 TAILQ_INIT(&cif->vhif_vrs); 1439 ifp->if_carp = cif; 1440 1441 } else { 1442 struct carp_softc *vr; 1443 1444 cif = (struct carp_if *)ifp->if_carp; 1445 CARP_LOCK(cif); 1446 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1447 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1448 CARP_UNLOCK(cif); 1449 error = EINVAL; 1450 goto cleanup; 1451 } 1452 } 1453 sc->sc_ia = ia; 1454 sc->sc_carpdev = ifp; 1455 1456 { /* XXX prevent endless loop if already in queue */ 1457 struct carp_softc *vr, *after = NULL; 1458 int myself = 0; 1459 cif = (struct carp_if *)ifp->if_carp; 1460 1461 /* XXX: cif should not change, right? So we still hold the lock */ 1462 CARP_LOCK_ASSERT(cif); 1463 1464 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1465 if (vr == sc) 1466 myself = 1; 1467 if (vr->sc_vhid < sc->sc_vhid) 1468 after = vr; 1469 } 1470 1471 if (!myself) { 1472 /* We're trying to keep things in order */ 1473 if (after == NULL) { 1474 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1475 } else { 1476 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1477 } 1478 cif->vhif_nvrs++; 1479 } 1480 } 1481 1482 sc->sc_naddrs++; 1483 sc->sc_if.if_flags |= IFF_UP; 1484 if (own) 1485 sc->sc_advskew = 0; 1486 carp_sc_state_locked(sc); 1487 carp_setrun(sc, 0); 1488 1489 CARP_UNLOCK(cif); 1490 1491 return (0); 1492 1493 cleanup: 1494 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1495 return (error); 1496 } 1497 1498 static int 1499 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1500 { 1501 int error = 0; 1502 1503 if (!--sc->sc_naddrs) { 1504 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1505 struct ip_moptions *imo = &sc->sc_imo; 1506 1507 CARP_LOCK(cif); 1508 callout_stop(&sc->sc_ad_tmo); 1509 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1510 sc->sc_vhid = -1; 1511 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1512 imo->imo_multicast_ifp = NULL; 1513 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1514 if (!--cif->vhif_nvrs) { 1515 sc->sc_carpdev->if_carp = NULL; 1516 CARP_LOCK_DESTROY(cif); 1517 FREE(cif, M_IFADDR); 1518 } else { 1519 CARP_UNLOCK(cif); 1520 } 1521 } 1522 1523 return (error); 1524 } 1525 1526 #ifdef INET6 1527 static int 1528 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1529 { 1530 struct ifnet *ifp; 1531 struct carp_if *cif; 1532 struct in6_ifaddr *ia, *ia_if; 1533 struct ip6_moptions *im6o = &sc->sc_im6o; 1534 struct in6_multi_mship *imm; 1535 struct sockaddr_in6 addr; 1536 int own, error; 1537 1538 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1539 if (!(sc->sc_if.if_flags & IFF_UP)) 1540 carp_set_state(sc, INIT); 1541 if (sc->sc_naddrs6) 1542 sc->sc_if.if_flags |= IFF_UP; 1543 carp_setrun(sc, 0); 1544 return (0); 1545 } 1546 1547 /* we have to do it by hands to check we won't match on us */ 1548 ia_if = NULL; own = 0; 1549 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1550 int i; 1551 1552 for (i = 0; i < 4; i++) { 1553 if ((sin6->sin6_addr.s6_addr32[i] & 1554 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1555 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1556 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1557 break; 1558 } 1559 /* and, yeah, we need a multicast-capable iface too */ 1560 if (ia->ia_ifp != &sc->sc_ac.ac_if && 1561 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1562 (i == 4)) { 1563 if (!ia_if) 1564 ia_if = ia; 1565 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1566 &ia->ia_addr.sin6_addr)) 1567 own++; 1568 } 1569 } 1570 1571 if (!ia_if) 1572 return (EADDRNOTAVAIL); 1573 ia = ia_if; 1574 ifp = ia->ia_ifp; 1575 1576 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1577 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1578 return (EADDRNOTAVAIL); 1579 1580 if (!sc->sc_naddrs6) { 1581 im6o->im6o_multicast_ifp = ifp; 1582 1583 /* join CARP multicast address */ 1584 bzero(&addr, sizeof(addr)); 1585 addr.sin6_family = AF_INET6; 1586 addr.sin6_len = sizeof(addr); 1587 addr.sin6_addr.s6_addr16[0] = htons(0xff02); 1588 addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 1589 addr.sin6_addr.s6_addr8[15] = 0x12; 1590 if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) 1591 goto cleanup; 1592 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1593 1594 /* join solicited multicast address */ 1595 bzero(&addr.sin6_addr, sizeof(addr.sin6_addr)); 1596 addr.sin6_addr.s6_addr16[0] = htons(0xff02); 1597 addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 1598 addr.sin6_addr.s6_addr32[1] = 0; 1599 addr.sin6_addr.s6_addr32[2] = htonl(1); 1600 addr.sin6_addr.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1601 addr.sin6_addr.s6_addr8[12] = 0xff; 1602 if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) 1603 goto cleanup; 1604 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1605 } 1606 1607 if (!ifp->if_carp) { 1608 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1609 M_WAITOK|M_ZERO); 1610 if (!cif) { 1611 error = ENOBUFS; 1612 goto cleanup; 1613 } 1614 if ((error = ifpromisc(ifp, 1))) { 1615 FREE(cif, M_CARP); 1616 goto cleanup; 1617 } 1618 1619 CARP_LOCK_INIT(cif); 1620 CARP_LOCK(cif); 1621 cif->vhif_ifp = ifp; 1622 TAILQ_INIT(&cif->vhif_vrs); 1623 ifp->if_carp = cif; 1624 1625 } else { 1626 struct carp_softc *vr; 1627 1628 cif = (struct carp_if *)ifp->if_carp; 1629 CARP_LOCK(cif); 1630 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1631 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1632 CARP_UNLOCK(cif); 1633 error = EINVAL; 1634 goto cleanup; 1635 } 1636 } 1637 sc->sc_ia6 = ia; 1638 sc->sc_carpdev = ifp; 1639 1640 { /* XXX prevent endless loop if already in queue */ 1641 struct carp_softc *vr, *after = NULL; 1642 int myself = 0; 1643 cif = (struct carp_if *)ifp->if_carp; 1644 CARP_LOCK_ASSERT(cif); 1645 1646 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1647 if (vr == sc) 1648 myself = 1; 1649 if (vr->sc_vhid < sc->sc_vhid) 1650 after = vr; 1651 } 1652 1653 if (!myself) { 1654 /* We're trying to keep things in order */ 1655 if (after == NULL) { 1656 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1657 } else { 1658 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1659 } 1660 cif->vhif_nvrs++; 1661 } 1662 } 1663 1664 sc->sc_naddrs6++; 1665 sc->sc_ac.ac_if.if_flags |= IFF_UP; 1666 if (own) 1667 sc->sc_advskew = 0; 1668 carp_sc_state_locked(sc); 1669 carp_setrun(sc, 0); 1670 1671 CARP_UNLOCK(cif); 1672 1673 return (0); 1674 1675 cleanup: 1676 /* clean up multicast memberships */ 1677 if (!sc->sc_naddrs6) { 1678 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1679 imm = LIST_FIRST(&im6o->im6o_memberships); 1680 LIST_REMOVE(imm, i6mm_chain); 1681 in6_leavegroup(imm); 1682 } 1683 } 1684 return (error); 1685 } 1686 1687 static int 1688 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1689 { 1690 int error = 0; 1691 1692 if (!--sc->sc_naddrs6) { 1693 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1694 struct ip6_moptions *im6o = &sc->sc_im6o; 1695 1696 CARP_LOCK(cif); 1697 callout_stop(&sc->sc_ad_tmo); 1698 sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1699 sc->sc_vhid = -1; 1700 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1701 struct in6_multi_mship *imm = 1702 LIST_FIRST(&im6o->im6o_memberships); 1703 1704 LIST_REMOVE(imm, i6mm_chain); 1705 in6_leavegroup(imm); 1706 } 1707 im6o->im6o_multicast_ifp = NULL; 1708 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1709 if (!--cif->vhif_nvrs) { 1710 CARP_LOCK_DESTROY(cif); 1711 sc->sc_carpdev->if_carp = NULL; 1712 FREE(cif, M_IFADDR); 1713 } else 1714 CARP_UNLOCK(cif); 1715 } 1716 1717 return (error); 1718 } 1719 #endif /* INET6 */ 1720 1721 static int 1722 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1723 { 1724 struct carp_softc *sc = ifp->if_softc, *vr; 1725 struct carpreq carpr; 1726 struct ifaddr *ifa; 1727 struct ifreq *ifr; 1728 struct ifaliasreq *ifra; 1729 int locked = 0, error = 0; 1730 1731 ifa = (struct ifaddr *)addr; 1732 ifra = (struct ifaliasreq *)addr; 1733 ifr = (struct ifreq *)addr; 1734 1735 switch (cmd) { 1736 case SIOCSIFADDR: 1737 switch (ifa->ifa_addr->sa_family) { 1738 #ifdef INET 1739 case AF_INET: 1740 sc->sc_if.if_flags |= IFF_UP; 1741 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1742 sizeof(struct sockaddr)); 1743 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1744 break; 1745 #endif /* INET */ 1746 #ifdef INET6 1747 case AF_INET6: 1748 sc->sc_if.if_flags |= IFF_UP; 1749 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1750 break; 1751 #endif /* INET6 */ 1752 default: 1753 error = EAFNOSUPPORT; 1754 break; 1755 } 1756 break; 1757 1758 case SIOCAIFADDR: 1759 switch (ifa->ifa_addr->sa_family) { 1760 #ifdef INET 1761 case AF_INET: 1762 sc->sc_if.if_flags |= IFF_UP; 1763 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1764 sizeof(struct sockaddr)); 1765 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1766 break; 1767 #endif /* INET */ 1768 #ifdef INET6 1769 case AF_INET6: 1770 sc->sc_if.if_flags |= IFF_UP; 1771 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1772 break; 1773 #endif /* INET6 */ 1774 default: 1775 error = EAFNOSUPPORT; 1776 break; 1777 } 1778 break; 1779 1780 case SIOCDIFADDR: 1781 switch (ifa->ifa_addr->sa_family) { 1782 #ifdef INET 1783 case AF_INET: 1784 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1785 break; 1786 #endif /* INET */ 1787 #ifdef INET6 1788 case AF_INET6: 1789 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1790 break; 1791 #endif /* INET6 */ 1792 default: 1793 error = EAFNOSUPPORT; 1794 break; 1795 } 1796 break; 1797 1798 case SIOCSIFFLAGS: 1799 if (sc->sc_carpdev) { 1800 locked = 1; 1801 CARP_SCLOCK(sc); 1802 } 1803 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1804 callout_stop(&sc->sc_ad_tmo); 1805 callout_stop(&sc->sc_md_tmo); 1806 callout_stop(&sc->sc_md6_tmo); 1807 if (sc->sc_state == MASTER) 1808 carp_send_ad_locked(sc); 1809 carp_set_state(sc, INIT); 1810 carp_setrun(sc, 0); 1811 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1812 sc->sc_if.if_flags |= IFF_UP; 1813 carp_setrun(sc, 0); 1814 } 1815 break; 1816 1817 case SIOCSVH: 1818 if ((error = suser(curthread)) != 0) 1819 break; 1820 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1821 break; 1822 error = 1; 1823 if (sc->sc_carpdev) { 1824 locked = 1; 1825 CARP_SCLOCK(sc); 1826 } 1827 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1828 switch (carpr.carpr_state) { 1829 case BACKUP: 1830 callout_stop(&sc->sc_ad_tmo); 1831 carp_set_state(sc, BACKUP); 1832 carp_setrun(sc, 0); 1833 carp_setroute(sc, RTM_DELETE); 1834 break; 1835 case MASTER: 1836 carp_master_down_locked(sc); 1837 break; 1838 default: 1839 break; 1840 } 1841 } 1842 if (carpr.carpr_vhid > 0) { 1843 if (carpr.carpr_vhid > 255) { 1844 error = EINVAL; 1845 break; 1846 } 1847 if (sc->sc_carpdev) { 1848 struct carp_if *cif; 1849 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1850 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1851 if (vr != sc && 1852 vr->sc_vhid == carpr.carpr_vhid) 1853 return EEXIST; 1854 } 1855 sc->sc_vhid = carpr.carpr_vhid; 1856 sc->sc_ac.ac_enaddr[0] = 0; 1857 sc->sc_ac.ac_enaddr[1] = 0; 1858 sc->sc_ac.ac_enaddr[2] = 0x5e; 1859 sc->sc_ac.ac_enaddr[3] = 0; 1860 sc->sc_ac.ac_enaddr[4] = 1; 1861 sc->sc_ac.ac_enaddr[5] = sc->sc_vhid; 1862 error--; 1863 } 1864 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1865 if (carpr.carpr_advskew >= 255) { 1866 error = EINVAL; 1867 break; 1868 } 1869 if (carpr.carpr_advbase > 255) { 1870 error = EINVAL; 1871 break; 1872 } 1873 sc->sc_advbase = carpr.carpr_advbase; 1874 sc->sc_advskew = carpr.carpr_advskew; 1875 error--; 1876 } 1877 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1878 if (error > 0) 1879 error = EINVAL; 1880 else { 1881 error = 0; 1882 carp_setrun(sc, 0); 1883 } 1884 break; 1885 1886 case SIOCGVH: 1887 /* XXX: lockless read */ 1888 bzero(&carpr, sizeof(carpr)); 1889 carpr.carpr_state = sc->sc_state; 1890 carpr.carpr_vhid = sc->sc_vhid; 1891 carpr.carpr_advbase = sc->sc_advbase; 1892 carpr.carpr_advskew = sc->sc_advskew; 1893 if (suser(curthread) == 0) 1894 bcopy(sc->sc_key, carpr.carpr_key, 1895 sizeof(carpr.carpr_key)); 1896 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1897 break; 1898 1899 default: 1900 error = EINVAL; 1901 } 1902 1903 if (locked) 1904 CARP_SCUNLOCK(sc); 1905 1906 carp_hmac_prepare(sc); 1907 1908 return (error); 1909 } 1910 1911 /* 1912 * XXX: this is looutput. We should eventually use it from there. 1913 */ 1914 static int 1915 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1916 struct rtentry *rt) 1917 { 1918 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1919 1920 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1921 m_freem(m); 1922 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1923 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1924 } 1925 1926 ifp->if_opackets++; 1927 ifp->if_obytes += m->m_pkthdr.len; 1928 #if 1 /* XXX */ 1929 switch (dst->sa_family) { 1930 case AF_INET: 1931 case AF_INET6: 1932 case AF_IPX: 1933 case AF_APPLETALK: 1934 break; 1935 default: 1936 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 1937 m_freem(m); 1938 return (EAFNOSUPPORT); 1939 } 1940 #endif 1941 return(if_simloop(ifp, m, dst->sa_family, 0)); 1942 } 1943 1944 /* 1945 * Start output on carp interface. This function should never be called. 1946 */ 1947 static void 1948 carp_start(struct ifnet *ifp) 1949 { 1950 #ifdef DEBUG 1951 printf("%s: start called\n", ifp->if_xname); 1952 #endif 1953 } 1954 1955 int 1956 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1957 struct rtentry *rt) 1958 { 1959 struct m_tag *mtag; 1960 struct carp_softc *sc; 1961 struct ifnet *carp_ifp; 1962 1963 if (!sa) 1964 return (0); 1965 1966 switch (sa->sa_family) { 1967 #ifdef INET 1968 case AF_INET: 1969 break; 1970 #endif /* INET */ 1971 #ifdef INET6 1972 case AF_INET6: 1973 break; 1974 #endif /* INET6 */ 1975 default: 1976 return (0); 1977 } 1978 1979 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1980 if (mtag == NULL) 1981 return (0); 1982 1983 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 1984 sc = carp_ifp->if_softc; 1985 1986 /* Set the source MAC address to Virtual Router MAC Address */ 1987 switch (ifp->if_type) { 1988 case IFT_ETHER: 1989 case IFT_L2VLAN: { 1990 struct ether_header *eh; 1991 1992 eh = mtod(m, struct ether_header *); 1993 eh->ether_shost[0] = 0; 1994 eh->ether_shost[1] = 0; 1995 eh->ether_shost[2] = 0x5e; 1996 eh->ether_shost[3] = 0; 1997 eh->ether_shost[4] = 1; 1998 eh->ether_shost[5] = sc->sc_vhid; 1999 } 2000 break; 2001 case IFT_FDDI: { 2002 struct fddi_header *fh; 2003 2004 fh = mtod(m, struct fddi_header *); 2005 fh->fddi_shost[0] = 0; 2006 fh->fddi_shost[1] = 0; 2007 fh->fddi_shost[2] = 0x5e; 2008 fh->fddi_shost[3] = 0; 2009 fh->fddi_shost[4] = 1; 2010 fh->fddi_shost[5] = sc->sc_vhid; 2011 } 2012 break; 2013 case IFT_ISO88025: { 2014 struct iso88025_header *th; 2015 th = mtod(m, struct iso88025_header *); 2016 th->iso88025_shost[0] = 3; 2017 th->iso88025_shost[1] = 0; 2018 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2019 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2020 th->iso88025_shost[4] = 0; 2021 th->iso88025_shost[5] = 0; 2022 } 2023 break; 2024 default: 2025 printf("%s: carp is not supported for this interface type\n", 2026 ifp->if_xname); 2027 return (EOPNOTSUPP); 2028 } 2029 2030 return (0); 2031 } 2032 2033 static void 2034 carp_set_state(struct carp_softc *sc, int state) 2035 { 2036 2037 if (sc->sc_carpdev) 2038 CARP_SCLOCK_ASSERT(sc); 2039 2040 if (sc->sc_state == state) 2041 return; 2042 2043 sc->sc_state = state; 2044 switch (state) { 2045 case BACKUP: 2046 sc->sc_ac.ac_if.if_link_state = LINK_STATE_DOWN; 2047 break; 2048 case MASTER: 2049 sc->sc_ac.ac_if.if_link_state = LINK_STATE_UP; 2050 break; 2051 default: 2052 sc->sc_ac.ac_if.if_link_state = LINK_STATE_UNKNOWN; 2053 break; 2054 } 2055 rt_ifmsg(&sc->sc_ac.ac_if); 2056 } 2057 2058 void 2059 carp_carpdev_state(void *v) 2060 { 2061 struct carp_if *cif = v; 2062 2063 CARP_LOCK(cif); 2064 carp_carpdev_state_locked(cif); 2065 CARP_UNLOCK(cif); 2066 } 2067 2068 static void 2069 carp_carpdev_state_locked(struct carp_if *cif) 2070 { 2071 struct carp_softc *sc; 2072 2073 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2074 carp_sc_state_locked(sc); 2075 } 2076 2077 static void 2078 carp_sc_state_locked(struct carp_softc *sc) 2079 { 2080 CARP_SCLOCK_ASSERT(sc); 2081 2082 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2083 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2084 sc->sc_flags_backup = sc->sc_if.if_flags; 2085 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 2086 callout_stop(&sc->sc_ad_tmo); 2087 callout_stop(&sc->sc_md_tmo); 2088 callout_stop(&sc->sc_md6_tmo); 2089 carp_set_state(sc, INIT); 2090 carp_setrun(sc, 0); 2091 if (!sc->sc_suppress) { 2092 carp_suppress_preempt++; 2093 if (carp_suppress_preempt == 1) { 2094 CARP_SCUNLOCK(sc); 2095 carp_send_ad_all(); 2096 CARP_SCLOCK(sc); 2097 } 2098 } 2099 sc->sc_suppress = 1; 2100 } else { 2101 sc->sc_if.if_flags |= sc->sc_flags_backup; 2102 carp_set_state(sc, INIT); 2103 carp_setrun(sc, 0); 2104 if (sc->sc_suppress) 2105 carp_suppress_preempt--; 2106 sc->sc_suppress = 0; 2107 } 2108 2109 return; 2110 } 2111 2112 static int 2113 carp_modevent(module_t mod, int type, void *data) 2114 { 2115 int error = 0; 2116 2117 switch (type) { 2118 case MOD_LOAD: 2119 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2120 LIST_INIT(&carpif_list); 2121 if_clone_attach(&carp_cloner); 2122 break; 2123 2124 case MOD_UNLOAD: 2125 if_clone_detach(&carp_cloner); 2126 while (!LIST_EMPTY(&carpif_list)) 2127 carp_clone_destroy(&LIST_FIRST(&carpif_list)->sc_if); 2128 mtx_destroy(&carp_mtx); 2129 break; 2130 2131 default: 2132 error = EINVAL; 2133 break; 2134 } 2135 2136 return error; 2137 } 2138 2139 static moduledata_t carp_mod = { 2140 "carp", 2141 carp_modevent, 2142 0 2143 }; 2144 2145 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2146