1 /* $FreeBSD$ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_carp.h" 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/proc.h> 45 #include <sys/sysctl.h> 46 #include <sys/syslog.h> 47 #include <sys/signalvar.h> 48 #include <sys/filio.h> 49 #include <sys/sockio.h> 50 51 #include <sys/socket.h> 52 #include <sys/vnode.h> 53 54 #include <machine/stdarg.h> 55 56 #include <net/bpf.h> 57 #include <net/ethernet.h> 58 #include <net/fddi.h> 59 #include <net/iso88025.h> 60 #include <net/if.h> 61 #include <net/if_clone.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 65 #ifdef INET 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/if_ether.h> 72 #include <machine/in_cksum.h> 73 #endif 74 75 #ifdef INET6 76 #include <netinet/icmp6.h> 77 #include <netinet/ip6.h> 78 #include <netinet6/ip6_var.h> 79 #include <netinet6/nd6.h> 80 #include <net/if_dl.h> 81 #endif 82 83 #include <crypto/sha1.h> 84 #include <netinet/ip_carp.h> 85 86 #define CARP_IFNAME "carp" 87 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 88 SYSCTL_DECL(_net_inet_carp); 89 90 struct carp_softc { 91 struct arpcom sc_ac; /* Interface clue */ 92 #define sc_if sc_ac.ac_if 93 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 94 struct in_ifaddr *sc_ia; /* primary iface address */ 95 struct ip_moptions sc_imo; 96 #ifdef INET6 97 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 98 struct ip6_moptions sc_im6o; 99 #endif /* INET6 */ 100 TAILQ_ENTRY(carp_softc) sc_list; 101 102 enum { INIT = 0, BACKUP, MASTER } sc_state; 103 104 int sc_flags_backup; 105 int sc_suppress; 106 107 int sc_sendad_errors; 108 #define CARP_SENDAD_MAX_ERRORS 3 109 int sc_sendad_success; 110 #define CARP_SENDAD_MIN_SUCCESS 3 111 112 int sc_vhid; 113 int sc_advskew; 114 int sc_naddrs; 115 int sc_naddrs6; 116 int sc_advbase; /* seconds */ 117 int sc_init_counter; 118 u_int64_t sc_counter; 119 120 /* authentication */ 121 #define CARP_HMAC_PAD 64 122 unsigned char sc_key[CARP_KEY_LEN]; 123 unsigned char sc_pad[CARP_HMAC_PAD]; 124 SHA1_CTX sc_sha1; 125 126 struct callout sc_ad_tmo; /* advertisement timeout */ 127 struct callout sc_md_tmo; /* master down timeout */ 128 struct callout sc_md6_tmo; /* master down timeout */ 129 130 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 131 }; 132 133 int carp_suppress_preempt = 0; 134 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 135 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 136 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 137 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 138 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 139 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 140 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 141 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 142 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 143 144 struct carpstats carpstats; 145 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 146 &carpstats, carpstats, 147 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 148 149 struct carp_if { 150 TAILQ_HEAD(, carp_softc) vhif_vrs; 151 int vhif_nvrs; 152 153 struct callout cif_tmo; 154 struct ifnet *vhif_ifp; 155 struct mtx vhif_mtx; 156 }; 157 158 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 159 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 160 161 /* lock per carp_if queue */ 162 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 163 NULL, MTX_DEF) 164 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 165 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 166 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 167 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 168 169 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 170 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 171 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 172 173 #define CARP_LOG(...) do { \ 174 if (carp_opts[CARPCTL_LOG] > 0) \ 175 log(LOG_INFO, __VA_ARGS__); \ 176 } while (0) 177 178 #define CARP_DEBUG(...) do { \ 179 if (carp_opts[CARPCTL_LOG] > 1) \ 180 log(LOG_DEBUG, __VA_ARGS__); \ 181 } while (0) 182 183 static void carp_hmac_prepare(struct carp_softc *); 184 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 185 unsigned char *); 186 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 187 unsigned char *); 188 static void carp_setroute(struct carp_softc *, int); 189 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 190 static int carp_clone_create(struct if_clone *, int); 191 static void carp_clone_destroy(struct ifnet *); 192 static void carpdetach(struct carp_softc *); 193 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 194 struct carp_header *); 195 static void carp_send_ad_all(void); 196 static void carp_send_ad(void *); 197 static void carp_send_ad_locked(struct carp_softc *); 198 static void carp_send_arp(struct carp_softc *); 199 static void carp_master_down(void *); 200 static void carp_master_down_locked(struct carp_softc *); 201 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 202 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 203 struct rtentry *); 204 static void carp_start(struct ifnet *); 205 static void carp_setrun(struct carp_softc *, sa_family_t); 206 static void carp_set_state(struct carp_softc *, int); 207 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 208 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 209 210 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 211 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 212 static void carp_carpdev_state1(void *); 213 static void carp_carpdev_state_locked(struct carp_if *); 214 #ifdef INET6 215 static void carp_send_na(struct carp_softc *); 216 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 217 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 218 #endif 219 220 static LIST_HEAD(, carp_softc) carpif_list; 221 static struct mtx carp_mtx; 222 IFC_SIMPLE_DECLARE(carp, 0); 223 224 static __inline u_int16_t 225 carp_cksum(struct mbuf *m, int len) 226 { 227 return (in_cksum(m, len)); 228 } 229 230 static void 231 carp_hmac_prepare(struct carp_softc *sc) 232 { 233 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 234 u_int8_t vhid = sc->sc_vhid & 0xff; 235 struct ifaddr *ifa; 236 int i; 237 #ifdef INET6 238 struct in6_addr in6; 239 #endif 240 241 if (sc->sc_carpdev) 242 CARP_SCLOCK(sc); 243 244 /* XXX: possible race here */ 245 246 /* compute ipad from key */ 247 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 248 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 249 for (i = 0; i < sizeof(sc->sc_pad); i++) 250 sc->sc_pad[i] ^= 0x36; 251 252 /* precompute first part of inner hash */ 253 SHA1Init(&sc->sc_sha1); 254 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 255 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 256 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 257 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 258 #ifdef INET 259 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 260 if (ifa->ifa_addr->sa_family == AF_INET) 261 SHA1Update(&sc->sc_sha1, 262 (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, 263 sizeof(struct in_addr)); 264 } 265 #endif /* INET */ 266 #ifdef INET6 267 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 268 if (ifa->ifa_addr->sa_family == AF_INET6) { 269 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 270 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 271 in6.s6_addr16[1] = 0; 272 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 273 } 274 } 275 #endif /* INET6 */ 276 277 /* convert ipad to opad */ 278 for (i = 0; i < sizeof(sc->sc_pad); i++) 279 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 280 281 if (sc->sc_carpdev) 282 CARP_SCUNLOCK(sc); 283 } 284 285 static void 286 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 287 unsigned char md[20]) 288 { 289 SHA1_CTX sha1ctx; 290 291 /* fetch first half of inner hash */ 292 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 293 294 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 295 SHA1Final(md, &sha1ctx); 296 297 /* outer hash */ 298 SHA1Init(&sha1ctx); 299 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 300 SHA1Update(&sha1ctx, md, 20); 301 SHA1Final(md, &sha1ctx); 302 } 303 304 static int 305 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 306 unsigned char md[20]) 307 { 308 unsigned char md2[20]; 309 310 CARP_SCLOCK_ASSERT(sc); 311 312 carp_hmac_generate(sc, counter, md2); 313 314 return (bcmp(md, md2, sizeof(md2))); 315 } 316 317 static void 318 carp_setroute(struct carp_softc *sc, int cmd) 319 { 320 struct ifaddr *ifa; 321 int s; 322 323 if (sc->sc_carpdev) 324 CARP_SCLOCK_ASSERT(sc); 325 326 s = splnet(); 327 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 328 if (ifa->ifa_addr->sa_family == AF_INET && 329 sc->sc_carpdev != NULL) { 330 int count = carp_addrcount( 331 (struct carp_if *)sc->sc_carpdev->if_carp, 332 ifatoia(ifa), CARP_COUNT_MASTER); 333 334 if ((cmd == RTM_ADD && count == 1) || 335 (cmd == RTM_DELETE && count == 0)) 336 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 337 } 338 #ifdef INET6 339 if (ifa->ifa_addr->sa_family == AF_INET6) { 340 if (cmd == RTM_ADD) 341 in6_ifaddloop(ifa); 342 else 343 in6_ifremloop(ifa); 344 } 345 #endif /* INET6 */ 346 } 347 splx(s); 348 } 349 350 static int 351 carp_clone_create(struct if_clone *ifc, int unit) 352 { 353 354 struct carp_softc *sc; 355 struct ifnet *ifp; 356 357 MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 358 359 sc->sc_flags_backup = 0; 360 sc->sc_suppress = 0; 361 sc->sc_advbase = CARP_DFLTINTV; 362 sc->sc_vhid = -1; /* required setting */ 363 sc->sc_advskew = 0; 364 sc->sc_init_counter = 1; 365 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 366 #ifdef INET6 367 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 368 #endif 369 370 callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE); 371 callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE); 372 callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE); 373 374 ifp = &sc->sc_if; 375 ifp->if_softc = sc; 376 if_initname(ifp, CARP_IFNAME, unit); 377 ifp->if_mtu = ETHERMTU; 378 ifp->if_flags = 0; 379 ifp->if_ioctl = carp_ioctl; 380 ifp->if_output = carp_looutput; 381 ifp->if_start = carp_start; 382 ifp->if_type = IFT_CARP; 383 ifp->if_snd.ifq_maxlen = ifqmaxlen; 384 ifp->if_hdrlen = 0; 385 if_attach(ifp); 386 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t)); 387 mtx_lock(&carp_mtx); 388 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 389 mtx_unlock(&carp_mtx); 390 return (0); 391 } 392 393 static void 394 carp_clone_destroy(struct ifnet *ifp) 395 { 396 struct carp_softc *sc = ifp->if_softc; 397 struct carp_if *cif; 398 struct ip_moptions *imo = &sc->sc_imo; 399 #ifdef INET6 400 struct ip6_moptions *im6o = &sc->sc_im6o; 401 #endif 402 403 /* carpdetach(sc); */ 404 405 callout_stop(&sc->sc_ad_tmo); 406 callout_stop(&sc->sc_md_tmo); 407 callout_stop(&sc->sc_md6_tmo); 408 409 if (imo->imo_num_memberships) { 410 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 411 imo->imo_multicast_ifp = NULL; 412 } 413 #ifdef INET6 414 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 415 struct in6_multi_mship *imm = 416 LIST_FIRST(&im6o->im6o_memberships); 417 LIST_REMOVE(imm, i6mm_chain); 418 in6_leavegroup(imm); 419 } 420 im6o->im6o_multicast_ifp = NULL; 421 #endif 422 423 /* Remove ourself from parents if_carp queue */ 424 if (sc->sc_carpdev && (cif = sc->sc_carpdev->if_carp)) { 425 CARP_LOCK(cif); 426 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 427 if (!--cif->vhif_nvrs) { 428 callout_drain(&cif->cif_tmo); 429 sc->sc_carpdev->if_carp = NULL; 430 CARP_LOCK_DESTROY(cif); 431 FREE(cif, M_CARP); 432 ifpromisc(sc->sc_carpdev, 0); 433 } else { 434 CARP_UNLOCK(cif); 435 } 436 } 437 438 mtx_lock(&carp_mtx); 439 LIST_REMOVE(sc, sc_next); 440 mtx_unlock(&carp_mtx); 441 bpfdetach(ifp); 442 if_detach(ifp); 443 free(sc, M_CARP); 444 } 445 446 /* 447 * process input packet. 448 * we have rearranged checks order compared to the rfc, 449 * but it seems more efficient this way or not possible otherwise. 450 */ 451 void 452 carp_input(struct mbuf *m, int hlen) 453 { 454 struct ip *ip = mtod(m, struct ip *); 455 struct carp_header *ch; 456 int iplen, len; 457 458 carpstats.carps_ipackets++; 459 460 if (!carp_opts[CARPCTL_ALLOW]) { 461 m_freem(m); 462 return; 463 } 464 465 /* check if received on a valid carp interface */ 466 if (m->m_pkthdr.rcvif->if_carp == NULL) { 467 carpstats.carps_badif++; 468 CARP_LOG("carp_input: packet received on non-carp " 469 "interface: %s\n", 470 m->m_pkthdr.rcvif->if_xname); 471 m_freem(m); 472 return; 473 } 474 475 /* verify that the IP TTL is 255. */ 476 if (ip->ip_ttl != CARP_DFLTTL) { 477 carpstats.carps_badttl++; 478 CARP_LOG("carp_input: received ttl %d != 255i on %s\n", 479 ip->ip_ttl, 480 m->m_pkthdr.rcvif->if_xname); 481 m_freem(m); 482 return; 483 } 484 485 iplen = ip->ip_hl << 2; 486 487 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 488 carpstats.carps_badlen++; 489 CARP_LOG("carp_input: received len %zd < " 490 "sizeof(struct carp_header)\n", 491 m->m_len - sizeof(struct ip)); 492 m_freem(m); 493 return; 494 } 495 496 if (iplen + sizeof(*ch) < m->m_len) { 497 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 498 carpstats.carps_hdrops++; 499 CARP_LOG("carp_input: pullup failed\n"); 500 return; 501 } 502 ip = mtod(m, struct ip *); 503 } 504 ch = (struct carp_header *)((char *)ip + iplen); 505 506 /* 507 * verify that the received packet length is 508 * equal to the CARP header 509 */ 510 len = iplen + sizeof(*ch); 511 if (len > m->m_pkthdr.len) { 512 carpstats.carps_badlen++; 513 CARP_LOG("carp_input: packet too short %d on %s\n", 514 m->m_pkthdr.len, 515 m->m_pkthdr.rcvif->if_xname); 516 m_freem(m); 517 return; 518 } 519 520 if ((m = m_pullup(m, len)) == NULL) { 521 carpstats.carps_hdrops++; 522 return; 523 } 524 ip = mtod(m, struct ip *); 525 ch = (struct carp_header *)((char *)ip + iplen); 526 527 /* verify the CARP checksum */ 528 m->m_data += iplen; 529 if (carp_cksum(m, len - iplen)) { 530 carpstats.carps_badsum++; 531 CARP_LOG("carp_input: checksum failed on %s\n", 532 m->m_pkthdr.rcvif->if_xname); 533 m_freem(m); 534 return; 535 } 536 m->m_data -= iplen; 537 538 carp_input_c(m, ch, AF_INET); 539 } 540 541 #ifdef INET6 542 int 543 carp6_input(struct mbuf **mp, int *offp, int proto) 544 { 545 struct mbuf *m = *mp; 546 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 547 struct carp_header *ch; 548 u_int len; 549 550 carpstats.carps_ipackets6++; 551 552 if (!carp_opts[CARPCTL_ALLOW]) { 553 m_freem(m); 554 return (IPPROTO_DONE); 555 } 556 557 /* check if received on a valid carp interface */ 558 if (m->m_pkthdr.rcvif->if_carp == NULL) { 559 carpstats.carps_badif++; 560 CARP_LOG("carp6_input: packet received on non-carp " 561 "interface: %s\n", 562 m->m_pkthdr.rcvif->if_xname); 563 m_freem(m); 564 return (IPPROTO_DONE); 565 } 566 567 /* verify that the IP TTL is 255 */ 568 if (ip6->ip6_hlim != CARP_DFLTTL) { 569 carpstats.carps_badttl++; 570 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 571 ip6->ip6_hlim, 572 m->m_pkthdr.rcvif->if_xname); 573 m_freem(m); 574 return (IPPROTO_DONE); 575 } 576 577 /* verify that we have a complete carp packet */ 578 len = m->m_len; 579 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 580 if (ch == NULL) { 581 carpstats.carps_badlen++; 582 CARP_LOG("carp6_input: packet size %u too small\n", len); 583 return (IPPROTO_DONE); 584 } 585 586 587 /* verify the CARP checksum */ 588 m->m_data += *offp; 589 if (carp_cksum(m, sizeof(*ch))) { 590 carpstats.carps_badsum++; 591 CARP_LOG("carp6_input: checksum failed, on %s\n", 592 m->m_pkthdr.rcvif->if_xname); 593 m_freem(m); 594 return (IPPROTO_DONE); 595 } 596 m->m_data -= *offp; 597 598 carp_input_c(m, ch, AF_INET6); 599 return (IPPROTO_DONE); 600 } 601 #endif /* INET6 */ 602 603 static void 604 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 605 { 606 struct ifnet *ifp = m->m_pkthdr.rcvif; 607 struct carp_softc *sc; 608 u_int64_t tmp_counter; 609 struct timeval sc_tv, ch_tv; 610 611 /* verify that the VHID is valid on the receiving interface */ 612 CARP_LOCK(ifp->if_carp); 613 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 614 if (sc->sc_vhid == ch->carp_vhid) 615 break; 616 617 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 618 (IFF_UP|IFF_RUNNING)) { 619 carpstats.carps_badvhid++; 620 CARP_UNLOCK(ifp->if_carp); 621 m_freem(m); 622 return; 623 } 624 625 getmicrotime(&sc->sc_if.if_lastchange); 626 sc->sc_if.if_ipackets++; 627 sc->sc_if.if_ibytes += m->m_pkthdr.len; 628 629 if (sc->sc_if.if_bpf) { 630 struct ip *ip = mtod(m, struct ip *); 631 uint32_t af1 = af; 632 633 /* BPF wants net byte order */ 634 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 635 ip->ip_off = htons(ip->ip_off); 636 bpf_mtap2(sc->sc_if.if_bpf, &af1, sizeof(af1), m); 637 } 638 639 /* verify the CARP version. */ 640 if (ch->carp_version != CARP_VERSION) { 641 carpstats.carps_badver++; 642 sc->sc_if.if_ierrors++; 643 CARP_UNLOCK(ifp->if_carp); 644 CARP_LOG("%s; invalid version %d\n", 645 sc->sc_if.if_xname, 646 ch->carp_version); 647 m_freem(m); 648 return; 649 } 650 651 /* verify the hash */ 652 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 653 carpstats.carps_badauth++; 654 sc->sc_if.if_ierrors++; 655 CARP_UNLOCK(ifp->if_carp); 656 CARP_LOG("%s: incorrect hash\n", sc->sc_if.if_xname); 657 m_freem(m); 658 return; 659 } 660 661 tmp_counter = ntohl(ch->carp_counter[0]); 662 tmp_counter = tmp_counter<<32; 663 tmp_counter += ntohl(ch->carp_counter[1]); 664 665 /* XXX Replay protection goes here */ 666 667 sc->sc_init_counter = 0; 668 sc->sc_counter = tmp_counter; 669 670 sc_tv.tv_sec = sc->sc_advbase; 671 if (carp_suppress_preempt && sc->sc_advskew < 240) 672 sc_tv.tv_usec = 240 * 1000000 / 256; 673 else 674 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 675 ch_tv.tv_sec = ch->carp_advbase; 676 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 677 678 switch (sc->sc_state) { 679 case INIT: 680 break; 681 case MASTER: 682 /* 683 * If we receive an advertisement from a master who's going to 684 * be more frequent than us, go into BACKUP state. 685 */ 686 if (timevalcmp(&sc_tv, &ch_tv, >) || 687 timevalcmp(&sc_tv, &ch_tv, ==)) { 688 callout_stop(&sc->sc_ad_tmo); 689 CARP_DEBUG("%s: MASTER -> BACKUP " 690 "(more frequent advertisement received)\n", 691 sc->sc_if.if_xname); 692 carp_set_state(sc, BACKUP); 693 carp_setrun(sc, 0); 694 carp_setroute(sc, RTM_DELETE); 695 } 696 break; 697 case BACKUP: 698 /* 699 * If we're pre-empting masters who advertise slower than us, 700 * and this one claims to be slower, treat him as down. 701 */ 702 if (carp_opts[CARPCTL_PREEMPT] && 703 timevalcmp(&sc_tv, &ch_tv, <)) { 704 CARP_DEBUG("%s: BACKUP -> MASTER " 705 "(preempting a slower master)\n", 706 sc->sc_if.if_xname); 707 carp_master_down_locked(sc); 708 break; 709 } 710 711 /* 712 * If the master is going to advertise at such a low frequency 713 * that he's guaranteed to time out, we'd might as well just 714 * treat him as timed out now. 715 */ 716 sc_tv.tv_sec = sc->sc_advbase * 3; 717 if (timevalcmp(&sc_tv, &ch_tv, <)) { 718 CARP_DEBUG("%s: BACKUP -> MASTER " 719 "(master timed out)\n", 720 sc->sc_if.if_xname); 721 carp_master_down_locked(sc); 722 break; 723 } 724 725 /* 726 * Otherwise, we reset the counter and wait for the next 727 * advertisement. 728 */ 729 carp_setrun(sc, af); 730 break; 731 } 732 733 CARP_UNLOCK(ifp->if_carp); 734 735 m_freem(m); 736 return; 737 } 738 739 static void 740 carpdetach(struct carp_softc *sc) 741 { 742 struct ifaddr *ifa; 743 744 callout_stop(&sc->sc_ad_tmo); 745 callout_stop(&sc->sc_md_tmo); 746 callout_stop(&sc->sc_md6_tmo); 747 748 while ((ifa = TAILQ_FIRST(&sc->sc_if.if_addrlist)) != NULL) 749 if (ifa->ifa_addr->sa_family == AF_INET) { 750 struct in_ifaddr *ia = ifatoia(ifa); 751 752 carp_del_addr(sc, &ia->ia_addr); 753 754 /* ripped screaming from in_control(SIOCDIFADDR) */ 755 in_ifscrub(&sc->sc_if, ia); 756 TAILQ_REMOVE(&sc->sc_if.if_addrlist, ifa, ifa_link); 757 TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); 758 IFAFREE((&ia->ia_ifa)); 759 } 760 } 761 762 /* Detach an interface from the carp. */ 763 void 764 carp_ifdetach(struct ifnet *ifp) 765 { 766 struct carp_softc *sc; 767 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 768 769 CARP_LOCK(cif); 770 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 771 carpdetach(sc); 772 CARP_UNLOCK(cif); 773 } 774 775 static int 776 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 777 { 778 struct m_tag *mtag; 779 struct ifnet *ifp = &sc->sc_if; 780 781 if (sc->sc_init_counter) { 782 /* this could also be seconds since unix epoch */ 783 sc->sc_counter = arc4random(); 784 sc->sc_counter = sc->sc_counter << 32; 785 sc->sc_counter += arc4random(); 786 } else 787 sc->sc_counter++; 788 789 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 790 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 791 792 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 793 794 /* Tag packet for carp_output */ 795 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 796 if (mtag == NULL) { 797 m_freem(m); 798 sc->sc_if.if_oerrors++; 799 return (ENOMEM); 800 } 801 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 802 m_tag_prepend(m, mtag); 803 804 return (0); 805 } 806 807 static void 808 carp_send_ad_all(void) 809 { 810 struct carp_softc *sc; 811 812 mtx_lock(&carp_mtx); 813 LIST_FOREACH(sc, &carpif_list, sc_next) { 814 if (sc->sc_carpdev == NULL) 815 continue; 816 CARP_SCLOCK(sc); 817 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) && 818 sc->sc_state == MASTER) 819 carp_send_ad_locked(sc); 820 CARP_SCUNLOCK(sc); 821 } 822 mtx_unlock(&carp_mtx); 823 } 824 825 static void 826 carp_send_ad(void *v) 827 { 828 struct carp_softc *sc = v; 829 830 CARP_SCLOCK(sc); 831 carp_send_ad_locked(sc); 832 CARP_SCUNLOCK(sc); 833 } 834 835 static void 836 carp_send_ad_locked(struct carp_softc *sc) 837 { 838 struct carp_header ch; 839 struct timeval tv; 840 struct carp_header *ch_ptr; 841 struct mbuf *m; 842 int len, advbase, advskew; 843 844 CARP_SCLOCK_ASSERT(sc); 845 846 /* bow out if we've lost our UPness or RUNNINGuiness */ 847 if ((sc->sc_if.if_flags & 848 (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 849 advbase = 255; 850 advskew = 255; 851 } else { 852 advbase = sc->sc_advbase; 853 if (!carp_suppress_preempt || sc->sc_advskew > 240) 854 advskew = sc->sc_advskew; 855 else 856 advskew = 240; 857 tv.tv_sec = advbase; 858 tv.tv_usec = advskew * 1000000 / 256; 859 } 860 861 ch.carp_version = CARP_VERSION; 862 ch.carp_type = CARP_ADVERTISEMENT; 863 ch.carp_vhid = sc->sc_vhid; 864 ch.carp_advbase = advbase; 865 ch.carp_advskew = advskew; 866 ch.carp_authlen = 7; /* XXX DEFINE */ 867 ch.carp_pad1 = 0; /* must be zero */ 868 ch.carp_cksum = 0; 869 870 #ifdef INET 871 if (sc->sc_ia) { 872 struct ip *ip; 873 874 MGETHDR(m, M_DONTWAIT, MT_HEADER); 875 if (m == NULL) { 876 sc->sc_ac.ac_if.if_oerrors++; 877 carpstats.carps_onomem++; 878 /* XXX maybe less ? */ 879 if (advbase != 255 || advskew != 255) 880 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 881 carp_send_ad, sc); 882 return; 883 } 884 len = sizeof(*ip) + sizeof(ch); 885 m->m_pkthdr.len = len; 886 m->m_pkthdr.rcvif = NULL; 887 m->m_len = len; 888 MH_ALIGN(m, m->m_len); 889 m->m_flags |= M_MCAST; 890 ip = mtod(m, struct ip *); 891 ip->ip_v = IPVERSION; 892 ip->ip_hl = sizeof(*ip) >> 2; 893 ip->ip_tos = IPTOS_LOWDELAY; 894 ip->ip_len = len; 895 ip->ip_id = ip_newid(); 896 ip->ip_off = IP_DF; 897 ip->ip_ttl = CARP_DFLTTL; 898 ip->ip_p = IPPROTO_CARP; 899 ip->ip_sum = 0; 900 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 901 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 902 903 ch_ptr = (struct carp_header *)(&ip[1]); 904 bcopy(&ch, ch_ptr, sizeof(ch)); 905 if (carp_prepare_ad(m, sc, ch_ptr)) 906 return; 907 908 m->m_data += sizeof(*ip); 909 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 910 m->m_data -= sizeof(*ip); 911 912 getmicrotime(&sc->sc_if.if_lastchange); 913 sc->sc_ac.ac_if.if_opackets++; 914 sc->sc_ac.ac_if.if_obytes += len; 915 carpstats.carps_opackets++; 916 917 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 918 sc->sc_if.if_oerrors++; 919 if (sc->sc_sendad_errors < INT_MAX) 920 sc->sc_sendad_errors++; 921 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 922 carp_suppress_preempt++; 923 if (carp_suppress_preempt == 1) { 924 CARP_SCUNLOCK(sc); 925 carp_send_ad_all(); 926 CARP_SCLOCK(sc); 927 } 928 } 929 sc->sc_sendad_success = 0; 930 } else { 931 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 932 if (++sc->sc_sendad_success >= 933 CARP_SENDAD_MIN_SUCCESS) { 934 carp_suppress_preempt--; 935 sc->sc_sendad_errors = 0; 936 } 937 } else 938 sc->sc_sendad_errors = 0; 939 } 940 } 941 #endif /* INET */ 942 #ifdef INET6 943 if (sc->sc_ia6) { 944 struct ip6_hdr *ip6; 945 946 MGETHDR(m, M_DONTWAIT, MT_HEADER); 947 if (m == NULL) { 948 sc->sc_ac.ac_if.if_oerrors++; 949 carpstats.carps_onomem++; 950 /* XXX maybe less ? */ 951 if (advbase != 255 || advskew != 255) 952 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 953 carp_send_ad, sc); 954 return; 955 } 956 len = sizeof(*ip6) + sizeof(ch); 957 m->m_pkthdr.len = len; 958 m->m_pkthdr.rcvif = NULL; 959 m->m_len = len; 960 MH_ALIGN(m, m->m_len); 961 m->m_flags |= M_MCAST; 962 ip6 = mtod(m, struct ip6_hdr *); 963 bzero(ip6, sizeof(*ip6)); 964 ip6->ip6_vfc |= IPV6_VERSION; 965 ip6->ip6_hlim = CARP_DFLTTL; 966 ip6->ip6_nxt = IPPROTO_CARP; 967 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 968 sizeof(struct in6_addr)); 969 /* set the multicast destination */ 970 971 ip6->ip6_dst.s6_addr8[0] = 0xff; 972 ip6->ip6_dst.s6_addr8[1] = 0x02; 973 ip6->ip6_dst.s6_addr8[15] = 0x12; 974 975 ch_ptr = (struct carp_header *)(&ip6[1]); 976 bcopy(&ch, ch_ptr, sizeof(ch)); 977 if (carp_prepare_ad(m, sc, ch_ptr)) 978 return; 979 980 m->m_data += sizeof(*ip6); 981 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 982 m->m_data -= sizeof(*ip6); 983 984 getmicrotime(&sc->sc_if.if_lastchange); 985 sc->sc_if.if_opackets++; 986 sc->sc_if.if_obytes += len; 987 carpstats.carps_opackets6++; 988 989 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 990 sc->sc_if.if_oerrors++; 991 if (sc->sc_sendad_errors < INT_MAX) 992 sc->sc_sendad_errors++; 993 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 994 carp_suppress_preempt++; 995 if (carp_suppress_preempt == 1) { 996 CARP_SCUNLOCK(sc); 997 carp_send_ad_all(); 998 CARP_SCLOCK(sc); 999 } 1000 } 1001 sc->sc_sendad_success = 0; 1002 } else { 1003 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1004 if (++sc->sc_sendad_success >= 1005 CARP_SENDAD_MIN_SUCCESS) { 1006 carp_suppress_preempt--; 1007 sc->sc_sendad_errors = 0; 1008 } 1009 } else 1010 sc->sc_sendad_errors = 0; 1011 } 1012 } 1013 #endif /* INET6 */ 1014 1015 if (advbase != 255 || advskew != 255) 1016 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1017 carp_send_ad, sc); 1018 1019 } 1020 1021 /* 1022 * Broadcast a gratuitous ARP request containing 1023 * the virtual router MAC address for each IP address 1024 * associated with the virtual router. 1025 */ 1026 static void 1027 carp_send_arp(struct carp_softc *sc) 1028 { 1029 struct ifaddr *ifa; 1030 1031 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1032 1033 if (ifa->ifa_addr->sa_family != AF_INET) 1034 continue; 1035 1036 /* arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); */ 1037 arp_ifinit2(sc->sc_carpdev, ifa, sc->sc_ac.ac_enaddr); 1038 1039 DELAY(1000); /* XXX */ 1040 } 1041 } 1042 1043 #ifdef INET6 1044 static void 1045 carp_send_na(struct carp_softc *sc) 1046 { 1047 struct ifaddr *ifa; 1048 struct in6_addr *in6; 1049 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1050 1051 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1052 1053 if (ifa->ifa_addr->sa_family != AF_INET6) 1054 continue; 1055 1056 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1057 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1058 ND_NA_FLAG_OVERRIDE, 1, NULL); 1059 DELAY(1000); /* XXX */ 1060 } 1061 } 1062 #endif /* INET6 */ 1063 1064 static int 1065 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1066 { 1067 struct carp_softc *vh; 1068 struct ifaddr *ifa; 1069 int count = 0; 1070 1071 CARP_LOCK_ASSERT(cif); 1072 1073 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1074 if ((type == CARP_COUNT_RUNNING && 1075 (vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1076 (IFF_UP|IFF_RUNNING)) || 1077 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1078 TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist, 1079 ifa_list) { 1080 if (ifa->ifa_addr->sa_family == AF_INET && 1081 ia->ia_addr.sin_addr.s_addr == 1082 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1083 count++; 1084 } 1085 } 1086 } 1087 return (count); 1088 } 1089 1090 int 1091 carp_iamatch(void *v, struct in_ifaddr *ia, 1092 struct in_addr *isaddr, u_int8_t **enaddr) 1093 { 1094 struct carp_if *cif = v; 1095 struct carp_softc *vh; 1096 int index, count = 0; 1097 struct ifaddr *ifa; 1098 1099 CARP_LOCK(cif); 1100 1101 if (carp_opts[CARPCTL_ARPBALANCE]) { 1102 /* 1103 * XXX proof of concept implementation. 1104 * We use the source ip to decide which virtual host should 1105 * handle the request. If we're master of that virtual host, 1106 * then we respond, otherwise, just drop the arp packet on 1107 * the floor. 1108 */ 1109 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1110 if (count == 0) { 1111 /* should never reach this */ 1112 CARP_UNLOCK(cif); 1113 return (0); 1114 } 1115 1116 /* this should be a hash, like pf_hash() */ 1117 index = isaddr->s_addr % count; 1118 count = 0; 1119 1120 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1121 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1122 (IFF_UP|IFF_RUNNING)) { 1123 TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, 1124 ifa_list) { 1125 if (ifa->ifa_addr->sa_family == 1126 AF_INET && 1127 ia->ia_addr.sin_addr.s_addr == 1128 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1129 if (count == index) { 1130 if (vh->sc_state == 1131 MASTER) { 1132 *enaddr = vh->sc_ac.ac_enaddr; 1133 CARP_UNLOCK(cif); 1134 return (1); 1135 } else { 1136 CARP_UNLOCK(cif); 1137 return (0); 1138 } 1139 } 1140 count++; 1141 } 1142 } 1143 } 1144 } 1145 } else { 1146 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1147 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1148 (IFF_UP|IFF_RUNNING) && ia->ia_ifp == 1149 &vh->sc_if) { 1150 *enaddr = vh->sc_ac.ac_enaddr; 1151 CARP_UNLOCK(cif); 1152 return (1); 1153 } 1154 } 1155 } 1156 CARP_UNLOCK(cif); 1157 return (0); 1158 } 1159 1160 #ifdef INET6 1161 struct ifaddr * 1162 carp_iamatch6(void *v, struct in6_addr *taddr) 1163 { 1164 struct carp_if *cif = v; 1165 struct carp_softc *vh; 1166 struct ifaddr *ifa; 1167 1168 CARP_LOCK(cif); 1169 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1170 TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { 1171 if (IN6_ARE_ADDR_EQUAL(taddr, 1172 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1173 ((vh->sc_if.if_flags & 1174 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { 1175 CARP_UNLOCK(cif); 1176 return (ifa); 1177 } 1178 } 1179 } 1180 CARP_UNLOCK(cif); 1181 1182 return (NULL); 1183 } 1184 1185 void * 1186 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1187 { 1188 struct m_tag *mtag; 1189 struct carp_if *cif = v; 1190 struct carp_softc *sc; 1191 struct ifaddr *ifa; 1192 1193 CARP_LOCK(cif); 1194 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1195 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1196 if (IN6_ARE_ADDR_EQUAL(taddr, 1197 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1198 ((sc->sc_if.if_flags & 1199 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { 1200 struct ifnet *ifp = &sc->sc_if; 1201 mtag = m_tag_get(PACKET_TAG_CARP, 1202 sizeof(struct ifnet *), M_NOWAIT); 1203 if (mtag == NULL) { 1204 /* better a bit than nothing */ 1205 CARP_UNLOCK(cif); 1206 return (sc->sc_ac.ac_enaddr); 1207 } 1208 bcopy(&ifp, (caddr_t)(mtag + 1), 1209 sizeof(struct ifnet *)); 1210 m_tag_prepend(m, mtag); 1211 1212 CARP_UNLOCK(cif); 1213 return (sc->sc_ac.ac_enaddr); 1214 } 1215 } 1216 } 1217 CARP_UNLOCK(cif); 1218 1219 return (NULL); 1220 } 1221 #endif 1222 1223 struct ifnet * 1224 carp_forus(void *v, void *dhost) 1225 { 1226 struct carp_if *cif = v; 1227 struct carp_softc *vh; 1228 u_int8_t *ena = dhost; 1229 1230 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1231 return (NULL); 1232 1233 CARP_LOCK(cif); 1234 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1235 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1236 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1237 !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) { 1238 CARP_UNLOCK(cif); 1239 return (&vh->sc_if); 1240 } 1241 1242 CARP_UNLOCK(cif); 1243 return (NULL); 1244 } 1245 1246 static void 1247 carp_master_down(void *v) 1248 { 1249 struct carp_softc *sc = v; 1250 1251 CARP_SCLOCK(sc); 1252 carp_master_down_locked(sc); 1253 CARP_SCUNLOCK(sc); 1254 } 1255 1256 static void 1257 carp_master_down_locked(struct carp_softc *sc) 1258 { 1259 if (sc->sc_carpdev) 1260 CARP_SCLOCK_ASSERT(sc); 1261 1262 switch (sc->sc_state) { 1263 case INIT: 1264 printf("%s: master_down event in INIT state\n", 1265 sc->sc_if.if_xname); 1266 break; 1267 case MASTER: 1268 break; 1269 case BACKUP: 1270 carp_set_state(sc, MASTER); 1271 carp_send_ad_locked(sc); 1272 carp_send_arp(sc); 1273 #ifdef INET6 1274 carp_send_na(sc); 1275 #endif /* INET6 */ 1276 carp_setrun(sc, 0); 1277 carp_setroute(sc, RTM_ADD); 1278 break; 1279 } 1280 } 1281 1282 /* 1283 * When in backup state, af indicates whether to reset the master down timer 1284 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1285 */ 1286 static void 1287 carp_setrun(struct carp_softc *sc, sa_family_t af) 1288 { 1289 struct timeval tv; 1290 1291 if (sc->sc_carpdev) 1292 CARP_SCLOCK_ASSERT(sc); 1293 1294 if (sc->sc_if.if_flags & IFF_UP && 1295 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) 1296 sc->sc_if.if_flags |= IFF_RUNNING; 1297 else { 1298 sc->sc_if.if_flags &= ~IFF_RUNNING; 1299 carp_setroute(sc, RTM_DELETE); 1300 return; 1301 } 1302 1303 switch (sc->sc_state) { 1304 case INIT: 1305 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1306 carp_send_ad_locked(sc); 1307 carp_send_arp(sc); 1308 #ifdef INET6 1309 carp_send_na(sc); 1310 #endif /* INET6 */ 1311 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1312 sc->sc_if.if_xname); 1313 carp_set_state(sc, MASTER); 1314 carp_setroute(sc, RTM_ADD); 1315 } else { 1316 CARP_DEBUG("%s: INIT -> BACKUP\n", sc->sc_if.if_xname); 1317 carp_set_state(sc, BACKUP); 1318 carp_setroute(sc, RTM_DELETE); 1319 carp_setrun(sc, 0); 1320 } 1321 break; 1322 case BACKUP: 1323 callout_stop(&sc->sc_ad_tmo); 1324 tv.tv_sec = 3 * sc->sc_advbase; 1325 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1326 switch (af) { 1327 #ifdef INET 1328 case AF_INET: 1329 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1330 carp_master_down, sc); 1331 break; 1332 #endif /* INET */ 1333 #ifdef INET6 1334 case AF_INET6: 1335 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1336 carp_master_down, sc); 1337 break; 1338 #endif /* INET6 */ 1339 default: 1340 if (sc->sc_naddrs) 1341 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1342 carp_master_down, sc); 1343 if (sc->sc_naddrs6) 1344 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1345 carp_master_down, sc); 1346 break; 1347 } 1348 break; 1349 case MASTER: 1350 tv.tv_sec = sc->sc_advbase; 1351 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1352 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1353 carp_send_ad, sc); 1354 break; 1355 } 1356 } 1357 1358 static int 1359 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1360 { 1361 struct ifnet *ifp; 1362 struct carp_if *cif; 1363 struct in_ifaddr *ia, *ia_if; 1364 struct ip_moptions *imo = &sc->sc_imo; 1365 struct in_addr addr; 1366 u_long iaddr = htonl(sin->sin_addr.s_addr); 1367 int own, error; 1368 1369 if (sin->sin_addr.s_addr == 0) { 1370 if (!(sc->sc_if.if_flags & IFF_UP)) 1371 carp_set_state(sc, INIT); 1372 if (sc->sc_naddrs) 1373 sc->sc_if.if_flags |= IFF_UP; 1374 carp_setrun(sc, 0); 1375 return (0); 1376 } 1377 1378 /* we have to do it by hands to check we won't match on us */ 1379 ia_if = NULL; own = 0; 1380 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1381 /* and, yeah, we need a multicast-capable iface too */ 1382 if (ia->ia_ifp != &sc->sc_if && 1383 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1384 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1385 if (!ia_if) 1386 ia_if = ia; 1387 if (sin->sin_addr.s_addr == 1388 ia->ia_addr.sin_addr.s_addr) 1389 own++; 1390 } 1391 } 1392 1393 if (!ia_if) 1394 return (EADDRNOTAVAIL); 1395 1396 ia = ia_if; 1397 ifp = ia->ia_ifp; 1398 1399 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1400 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) 1401 return (EADDRNOTAVAIL); 1402 1403 if (imo->imo_num_memberships == 0) { 1404 addr.s_addr = htonl(INADDR_CARP_GROUP); 1405 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 1406 return (ENOBUFS); 1407 imo->imo_num_memberships++; 1408 imo->imo_multicast_ifp = ifp; 1409 imo->imo_multicast_ttl = CARP_DFLTTL; 1410 imo->imo_multicast_loop = 0; 1411 } 1412 1413 if (!ifp->if_carp) { 1414 1415 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1416 M_WAITOK|M_ZERO); 1417 if (!cif) { 1418 error = ENOBUFS; 1419 goto cleanup; 1420 } 1421 if ((error = ifpromisc(ifp, 1))) { 1422 FREE(cif, M_CARP); 1423 goto cleanup; 1424 } 1425 1426 CARP_LOCK_INIT(cif); 1427 CARP_LOCK(cif); 1428 cif->vhif_ifp = ifp; 1429 TAILQ_INIT(&cif->vhif_vrs); 1430 callout_init(&cif->cif_tmo, NET_CALLOUT_MPSAFE); 1431 ifp->if_carp = cif; 1432 1433 } else { 1434 struct carp_softc *vr; 1435 1436 cif = (struct carp_if *)ifp->if_carp; 1437 CARP_LOCK(cif); 1438 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1439 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1440 CARP_UNLOCK(cif); 1441 error = EINVAL; 1442 goto cleanup; 1443 } 1444 } 1445 sc->sc_ia = ia; 1446 sc->sc_carpdev = ifp; 1447 1448 { /* XXX prevent endless loop if already in queue */ 1449 struct carp_softc *vr, *after = NULL; 1450 int myself = 0; 1451 cif = (struct carp_if *)ifp->if_carp; 1452 1453 /* XXX: cif should not change, right? So we still hold the lock */ 1454 CARP_LOCK_ASSERT(cif); 1455 1456 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1457 if (vr == sc) 1458 myself = 1; 1459 if (vr->sc_vhid < sc->sc_vhid) 1460 after = vr; 1461 } 1462 1463 if (!myself) { 1464 /* We're trying to keep things in order */ 1465 if (after == NULL) { 1466 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1467 } else { 1468 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1469 } 1470 cif->vhif_nvrs++; 1471 } 1472 } 1473 1474 sc->sc_naddrs++; 1475 sc->sc_if.if_flags |= IFF_UP; 1476 if (own) 1477 sc->sc_advskew = 0; 1478 carp_carpdev_state_locked(cif); 1479 carp_setrun(sc, 0); 1480 1481 CARP_UNLOCK(cif); 1482 1483 return (0); 1484 1485 cleanup: 1486 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1487 return (error); 1488 } 1489 1490 static int 1491 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1492 { 1493 int error = 0; 1494 1495 if (!--sc->sc_naddrs) { 1496 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1497 struct ip_moptions *imo = &sc->sc_imo; 1498 1499 CARP_LOCK(cif); 1500 callout_stop(&sc->sc_ad_tmo); 1501 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1502 sc->sc_vhid = -1; 1503 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1504 imo->imo_multicast_ifp = NULL; 1505 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1506 if (!--cif->vhif_nvrs) { 1507 callout_drain(&cif->cif_tmo); 1508 sc->sc_carpdev->if_carp = NULL; 1509 CARP_LOCK_DESTROY(cif); 1510 FREE(cif, M_IFADDR); 1511 } else { 1512 CARP_UNLOCK(cif); 1513 } 1514 } 1515 1516 return (error); 1517 } 1518 1519 #ifdef INET6 1520 static int 1521 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1522 { 1523 struct ifnet *ifp; 1524 struct carp_if *cif; 1525 struct in6_ifaddr *ia, *ia_if; 1526 struct ip6_moptions *im6o = &sc->sc_im6o; 1527 struct in6_multi_mship *imm; 1528 struct sockaddr_in6 addr; 1529 int own, error; 1530 1531 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1532 if (!(sc->sc_if.if_flags & IFF_UP)) 1533 carp_set_state(sc, INIT); 1534 if (sc->sc_naddrs6) 1535 sc->sc_if.if_flags |= IFF_UP; 1536 carp_setrun(sc, 0); 1537 return (0); 1538 } 1539 1540 /* we have to do it by hands to check we won't match on us */ 1541 ia_if = NULL; own = 0; 1542 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1543 int i; 1544 1545 for (i = 0; i < 4; i++) { 1546 if ((sin6->sin6_addr.s6_addr32[i] & 1547 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1548 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1549 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1550 break; 1551 } 1552 /* and, yeah, we need a multicast-capable iface too */ 1553 if (ia->ia_ifp != &sc->sc_ac.ac_if && 1554 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1555 (i == 4)) { 1556 if (!ia_if) 1557 ia_if = ia; 1558 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1559 &ia->ia_addr.sin6_addr)) 1560 own++; 1561 } 1562 } 1563 1564 if (!ia_if) 1565 return (EADDRNOTAVAIL); 1566 ia = ia_if; 1567 ifp = ia->ia_ifp; 1568 1569 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1570 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1571 return (EADDRNOTAVAIL); 1572 1573 if (!sc->sc_naddrs6) { 1574 im6o->im6o_multicast_ifp = ifp; 1575 1576 /* join CARP multicast address */ 1577 bzero(&addr, sizeof(addr)); 1578 addr.sin6_family = AF_INET6; 1579 addr.sin6_len = sizeof(addr); 1580 addr.sin6_addr.s6_addr16[0] = htons(0xff02); 1581 addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 1582 addr.sin6_addr.s6_addr8[15] = 0x12; 1583 if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) 1584 goto cleanup; 1585 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1586 1587 /* join solicited multicast address */ 1588 bzero(&addr.sin6_addr, sizeof(addr.sin6_addr)); 1589 addr.sin6_addr.s6_addr16[0] = htons(0xff02); 1590 addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 1591 addr.sin6_addr.s6_addr32[1] = 0; 1592 addr.sin6_addr.s6_addr32[2] = htonl(1); 1593 addr.sin6_addr.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1594 addr.sin6_addr.s6_addr8[12] = 0xff; 1595 if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) 1596 goto cleanup; 1597 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1598 } 1599 1600 if (!ifp->if_carp) { 1601 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1602 M_WAITOK|M_ZERO); 1603 if (!cif) { 1604 error = ENOBUFS; 1605 goto cleanup; 1606 } 1607 if ((error = ifpromisc(ifp, 1))) { 1608 FREE(cif, M_CARP); 1609 goto cleanup; 1610 } 1611 1612 CARP_LOCK_INIT(cif); 1613 CARP_LOCK(cif); 1614 cif->vhif_ifp = ifp; 1615 TAILQ_INIT(&cif->vhif_vrs); 1616 callout_init(&cif->cif_tmo, NET_CALLOUT_MPSAFE); 1617 ifp->if_carp = cif; 1618 1619 } else { 1620 struct carp_softc *vr; 1621 1622 cif = (struct carp_if *)ifp->if_carp; 1623 CARP_LOCK(cif); 1624 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1625 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1626 CARP_UNLOCK(cif); 1627 error = EINVAL; 1628 goto cleanup; 1629 } 1630 } 1631 sc->sc_ia6 = ia; 1632 sc->sc_carpdev = ifp; 1633 1634 { /* XXX prevent endless loop if already in queue */ 1635 struct carp_softc *vr, *after = NULL; 1636 int myself = 0; 1637 cif = (struct carp_if *)ifp->if_carp; 1638 CARP_LOCK_ASSERT(cif); 1639 1640 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1641 if (vr == sc) 1642 myself = 1; 1643 if (vr->sc_vhid < sc->sc_vhid) 1644 after = vr; 1645 } 1646 1647 if (!myself) { 1648 /* We're trying to keep things in order */ 1649 if (after == NULL) { 1650 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1651 } else { 1652 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1653 } 1654 cif->vhif_nvrs++; 1655 } 1656 } 1657 1658 sc->sc_naddrs6++; 1659 sc->sc_ac.ac_if.if_flags |= IFF_UP; 1660 if (own) 1661 sc->sc_advskew = 0; 1662 carp_carpdev_state_locked(cif); 1663 carp_setrun(sc, 0); 1664 1665 CARP_UNLOCK(cif); 1666 1667 return (0); 1668 1669 cleanup: 1670 /* clean up multicast memberships */ 1671 if (!sc->sc_naddrs6) { 1672 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1673 imm = LIST_FIRST(&im6o->im6o_memberships); 1674 LIST_REMOVE(imm, i6mm_chain); 1675 in6_leavegroup(imm); 1676 } 1677 } 1678 return (error); 1679 } 1680 1681 static int 1682 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1683 { 1684 int error = 0; 1685 1686 if (!--sc->sc_naddrs6) { 1687 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1688 struct ip6_moptions *im6o = &sc->sc_im6o; 1689 1690 CARP_LOCK(cif); 1691 callout_stop(&sc->sc_ad_tmo); 1692 sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1693 sc->sc_vhid = -1; 1694 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1695 struct in6_multi_mship *imm = 1696 LIST_FIRST(&im6o->im6o_memberships); 1697 1698 LIST_REMOVE(imm, i6mm_chain); 1699 in6_leavegroup(imm); 1700 } 1701 im6o->im6o_multicast_ifp = NULL; 1702 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1703 if (!--cif->vhif_nvrs) { 1704 callout_drain(&cif->cif_tmo); 1705 CARP_LOCK_DESTROY(cif); 1706 sc->sc_carpdev->if_carp = NULL; 1707 FREE(cif, M_IFADDR); 1708 } else 1709 CARP_UNLOCK(cif); 1710 } 1711 1712 return (error); 1713 } 1714 #endif /* INET6 */ 1715 1716 static int 1717 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1718 { 1719 struct carp_softc *sc = ifp->if_softc, *vr; 1720 struct carpreq carpr; 1721 struct ifaddr *ifa; 1722 struct ifreq *ifr; 1723 struct ifaliasreq *ifra; 1724 int locked = 0, error = 0; 1725 1726 ifa = (struct ifaddr *)addr; 1727 ifra = (struct ifaliasreq *)addr; 1728 ifr = (struct ifreq *)addr; 1729 1730 switch (cmd) { 1731 case SIOCSIFADDR: 1732 switch (ifa->ifa_addr->sa_family) { 1733 #ifdef INET 1734 case AF_INET: 1735 sc->sc_if.if_flags |= IFF_UP; 1736 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1737 sizeof(struct sockaddr)); 1738 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1739 break; 1740 #endif /* INET */ 1741 #ifdef INET6 1742 case AF_INET6: 1743 sc->sc_if.if_flags |= IFF_UP; 1744 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1745 break; 1746 #endif /* INET6 */ 1747 default: 1748 error = EAFNOSUPPORT; 1749 break; 1750 } 1751 break; 1752 1753 case SIOCAIFADDR: 1754 switch (ifa->ifa_addr->sa_family) { 1755 #ifdef INET 1756 case AF_INET: 1757 sc->sc_if.if_flags |= IFF_UP; 1758 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1759 sizeof(struct sockaddr)); 1760 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1761 break; 1762 #endif /* INET */ 1763 #ifdef INET6 1764 case AF_INET6: 1765 sc->sc_if.if_flags |= IFF_UP; 1766 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1767 break; 1768 #endif /* INET6 */ 1769 default: 1770 error = EAFNOSUPPORT; 1771 break; 1772 } 1773 break; 1774 1775 case SIOCDIFADDR: 1776 switch (ifa->ifa_addr->sa_family) { 1777 #ifdef INET 1778 case AF_INET: 1779 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1780 break; 1781 #endif /* INET */ 1782 #ifdef INET6 1783 case AF_INET6: 1784 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1785 break; 1786 #endif /* INET6 */ 1787 default: 1788 error = EAFNOSUPPORT; 1789 break; 1790 } 1791 break; 1792 1793 case SIOCSIFFLAGS: 1794 if (sc->sc_carpdev) { 1795 locked = 1; 1796 CARP_SCLOCK(sc); 1797 } 1798 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1799 callout_stop(&sc->sc_ad_tmo); 1800 callout_stop(&sc->sc_md_tmo); 1801 callout_stop(&sc->sc_md6_tmo); 1802 if (sc->sc_state == MASTER) 1803 carp_send_ad_locked(sc); 1804 carp_set_state(sc, INIT); 1805 carp_setrun(sc, 0); 1806 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1807 sc->sc_if.if_flags |= IFF_UP; 1808 carp_setrun(sc, 0); 1809 } 1810 break; 1811 1812 case SIOCSVH: 1813 if ((error = suser(curthread)) != 0) 1814 break; 1815 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1816 break; 1817 error = 1; 1818 if (sc->sc_carpdev) { 1819 locked = 1; 1820 CARP_SCLOCK(sc); 1821 } 1822 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1823 switch (carpr.carpr_state) { 1824 case BACKUP: 1825 callout_stop(&sc->sc_ad_tmo); 1826 carp_set_state(sc, BACKUP); 1827 carp_setrun(sc, 0); 1828 carp_setroute(sc, RTM_DELETE); 1829 break; 1830 case MASTER: 1831 carp_master_down_locked(sc); 1832 break; 1833 default: 1834 break; 1835 } 1836 } 1837 if (carpr.carpr_vhid > 0) { 1838 if (carpr.carpr_vhid > 255) { 1839 error = EINVAL; 1840 break; 1841 } 1842 if (sc->sc_carpdev) { 1843 struct carp_if *cif; 1844 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1845 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1846 if (vr != sc && 1847 vr->sc_vhid == carpr.carpr_vhid) 1848 return EEXIST; 1849 } 1850 sc->sc_vhid = carpr.carpr_vhid; 1851 sc->sc_ac.ac_enaddr[0] = 0; 1852 sc->sc_ac.ac_enaddr[1] = 0; 1853 sc->sc_ac.ac_enaddr[2] = 0x5e; 1854 sc->sc_ac.ac_enaddr[3] = 0; 1855 sc->sc_ac.ac_enaddr[4] = 1; 1856 sc->sc_ac.ac_enaddr[5] = sc->sc_vhid; 1857 error--; 1858 } 1859 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1860 if (carpr.carpr_advskew >= 255) { 1861 error = EINVAL; 1862 break; 1863 } 1864 if (carpr.carpr_advbase > 255) { 1865 error = EINVAL; 1866 break; 1867 } 1868 sc->sc_advbase = carpr.carpr_advbase; 1869 sc->sc_advskew = carpr.carpr_advskew; 1870 error--; 1871 } 1872 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1873 if (error > 0) 1874 error = EINVAL; 1875 else { 1876 error = 0; 1877 carp_setrun(sc, 0); 1878 } 1879 break; 1880 1881 case SIOCGVH: 1882 /* XXX: lockless read */ 1883 bzero(&carpr, sizeof(carpr)); 1884 carpr.carpr_state = sc->sc_state; 1885 carpr.carpr_vhid = sc->sc_vhid; 1886 carpr.carpr_advbase = sc->sc_advbase; 1887 carpr.carpr_advskew = sc->sc_advskew; 1888 if (suser(curthread) == 0) 1889 bcopy(sc->sc_key, carpr.carpr_key, 1890 sizeof(carpr.carpr_key)); 1891 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1892 break; 1893 1894 default: 1895 error = EINVAL; 1896 } 1897 1898 if (locked) 1899 CARP_SCUNLOCK(sc); 1900 1901 carp_hmac_prepare(sc); 1902 1903 return (error); 1904 } 1905 1906 /* 1907 * XXX: this is looutput. We should eventually use it from there. 1908 */ 1909 static int 1910 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1911 struct rtentry *rt) 1912 { 1913 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1914 1915 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1916 m_freem(m); 1917 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1918 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1919 } 1920 1921 ifp->if_opackets++; 1922 ifp->if_obytes += m->m_pkthdr.len; 1923 #if 1 /* XXX */ 1924 switch (dst->sa_family) { 1925 case AF_INET: 1926 case AF_INET6: 1927 case AF_IPX: 1928 case AF_APPLETALK: 1929 break; 1930 default: 1931 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 1932 m_freem(m); 1933 return (EAFNOSUPPORT); 1934 } 1935 #endif 1936 return(if_simloop(ifp, m, dst->sa_family, 0)); 1937 } 1938 1939 /* 1940 * Start output on carp interface. This function should never be called. 1941 */ 1942 static void 1943 carp_start(struct ifnet *ifp) 1944 { 1945 #ifdef DEBUG 1946 printf("%s: start called\n", ifp->if_xname); 1947 #endif 1948 } 1949 1950 int 1951 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1952 struct rtentry *rt) 1953 { 1954 struct m_tag *mtag; 1955 struct carp_softc *sc; 1956 struct ifnet *carp_ifp; 1957 1958 if (!sa) 1959 return (0); 1960 1961 switch (sa->sa_family) { 1962 #ifdef INET 1963 case AF_INET: 1964 break; 1965 #endif /* INET */ 1966 #ifdef INET6 1967 case AF_INET6: 1968 break; 1969 #endif /* INET6 */ 1970 default: 1971 return (0); 1972 } 1973 1974 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1975 if (mtag == NULL) 1976 return (0); 1977 1978 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 1979 sc = carp_ifp->if_softc; 1980 1981 /* Set the source MAC address to Virtual Router MAC Address */ 1982 switch (ifp->if_type) { 1983 case IFT_ETHER: 1984 case IFT_L2VLAN: { 1985 struct ether_header *eh; 1986 1987 eh = mtod(m, struct ether_header *); 1988 eh->ether_shost[0] = 0; 1989 eh->ether_shost[1] = 0; 1990 eh->ether_shost[2] = 0x5e; 1991 eh->ether_shost[3] = 0; 1992 eh->ether_shost[4] = 1; 1993 eh->ether_shost[5] = sc->sc_vhid; 1994 } 1995 break; 1996 case IFT_FDDI: { 1997 struct fddi_header *fh; 1998 1999 fh = mtod(m, struct fddi_header *); 2000 fh->fddi_shost[0] = 0; 2001 fh->fddi_shost[1] = 0; 2002 fh->fddi_shost[2] = 0x5e; 2003 fh->fddi_shost[3] = 0; 2004 fh->fddi_shost[4] = 1; 2005 fh->fddi_shost[5] = sc->sc_vhid; 2006 } 2007 break; 2008 case IFT_ISO88025: { 2009 struct iso88025_header *th; 2010 th = mtod(m, struct iso88025_header *); 2011 th->iso88025_shost[0] = 3; 2012 th->iso88025_shost[1] = 0; 2013 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2014 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2015 th->iso88025_shost[4] = 0; 2016 th->iso88025_shost[5] = 0; 2017 } 2018 break; 2019 default: 2020 printf("%s: carp is not supported for this interface type\n", 2021 ifp->if_xname); 2022 return (EOPNOTSUPP); 2023 } 2024 2025 return (0); 2026 } 2027 2028 static void 2029 carp_set_state(struct carp_softc *sc, int state) 2030 { 2031 2032 if (sc->sc_carpdev) 2033 CARP_SCLOCK_ASSERT(sc); 2034 2035 if (sc->sc_state == state) 2036 return; 2037 2038 sc->sc_state = state; 2039 switch (state) { 2040 case BACKUP: 2041 sc->sc_ac.ac_if.if_link_state = LINK_STATE_DOWN; 2042 break; 2043 case MASTER: 2044 sc->sc_ac.ac_if.if_link_state = LINK_STATE_UP; 2045 break; 2046 default: 2047 sc->sc_ac.ac_if.if_link_state = LINK_STATE_UNKNOWN; 2048 break; 2049 } 2050 rt_ifmsg(&sc->sc_ac.ac_if); 2051 } 2052 2053 void 2054 carp_carpdev_state(void *v) 2055 { 2056 struct carp_if *cif = v; 2057 2058 /* 2059 * We came here from interrupt handler of network 2060 * card. To avoid multiple LORs, we will queue function 2061 * for later. 2062 */ 2063 2064 callout_reset(&cif->cif_tmo, 1, carp_carpdev_state1, v); 2065 } 2066 2067 void 2068 carp_carpdev_state1(void *v) 2069 { 2070 struct carp_if *cif = v; 2071 2072 CARP_LOCK(cif); 2073 carp_carpdev_state_locked(cif); 2074 CARP_UNLOCK(cif); 2075 } 2076 2077 static void 2078 carp_carpdev_state_locked(struct carp_if *cif) 2079 { 2080 struct carp_softc *sc; 2081 2082 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2083 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2084 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2085 sc->sc_flags_backup = sc->sc_if.if_flags; 2086 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 2087 callout_stop(&sc->sc_ad_tmo); 2088 callout_stop(&sc->sc_md_tmo); 2089 callout_stop(&sc->sc_md6_tmo); 2090 carp_set_state(sc, INIT); 2091 carp_setrun(sc, 0); 2092 if (!sc->sc_suppress) { 2093 carp_suppress_preempt++; 2094 if (carp_suppress_preempt == 1) { 2095 CARP_SCUNLOCK(sc); 2096 carp_send_ad_all(); 2097 CARP_SCLOCK(sc); 2098 } 2099 } 2100 sc->sc_suppress = 1; 2101 } else { 2102 sc->sc_if.if_flags |= sc->sc_flags_backup; 2103 carp_set_state(sc, INIT); 2104 carp_setrun(sc, 0); 2105 if (sc->sc_suppress) 2106 carp_suppress_preempt--; 2107 sc->sc_suppress = 0; 2108 } 2109 } 2110 } 2111 2112 static int 2113 carp_modevent(module_t mod, int type, void *data) 2114 { 2115 int error = 0; 2116 2117 switch (type) { 2118 case MOD_LOAD: 2119 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2120 LIST_INIT(&carpif_list); 2121 if_clone_attach(&carp_cloner); 2122 break; 2123 2124 case MOD_UNLOAD: 2125 if_clone_detach(&carp_cloner); 2126 while (!LIST_EMPTY(&carpif_list)) 2127 carp_clone_destroy(&LIST_FIRST(&carpif_list)->sc_if); 2128 mtx_destroy(&carp_mtx); 2129 break; 2130 2131 default: 2132 error = EINVAL; 2133 break; 2134 } 2135 2136 return error; 2137 } 2138 2139 static moduledata_t carp_mod = { 2140 "carp", 2141 carp_modevent, 2142 0 2143 }; 2144 2145 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2146