1 /* $FreeBSD$ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_carp.h" 30 #include "opt_bpf.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/kernel.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/time.h> 44 #include <sys/proc.h> 45 #include <sys/sysctl.h> 46 #include <sys/syslog.h> 47 #include <sys/signalvar.h> 48 #include <sys/filio.h> 49 #include <sys/sockio.h> 50 51 #include <sys/socket.h> 52 #include <sys/vnode.h> 53 54 #include <machine/stdarg.h> 55 56 #include <net/bpf.h> 57 #include <net/ethernet.h> 58 #include <net/fddi.h> 59 #include <net/iso88025.h> 60 #include <net/if.h> 61 #include <net/if_clone.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 65 #ifdef INET 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/if_ether.h> 72 #include <machine/in_cksum.h> 73 #endif 74 75 #ifdef INET6 76 #include <netinet/icmp6.h> 77 #include <netinet/ip6.h> 78 #include <netinet6/ip6_var.h> 79 #include <netinet6/nd6.h> 80 #include <net/if_dl.h> 81 #endif 82 83 #include <crypto/sha1.h> 84 #include <netinet/ip_carp.h> 85 86 #define CARP_IFNAME "carp" 87 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 88 SYSCTL_DECL(_net_inet_carp); 89 90 struct carp_softc { 91 struct arpcom sc_ac; /* Interface clue */ 92 #define sc_if sc_ac.ac_if 93 struct ifnet *sc_carpdev; /* Pointer to parent interface */ 94 struct in_ifaddr *sc_ia; /* primary iface address */ 95 struct ip_moptions sc_imo; 96 #ifdef INET6 97 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 98 struct ip6_moptions sc_im6o; 99 #endif /* INET6 */ 100 TAILQ_ENTRY(carp_softc) sc_list; 101 102 enum { INIT = 0, BACKUP, MASTER } sc_state; 103 104 int sc_flags_backup; 105 int sc_suppress; 106 107 int sc_sendad_errors; 108 #define CARP_SENDAD_MAX_ERRORS 3 109 int sc_sendad_success; 110 #define CARP_SENDAD_MIN_SUCCESS 3 111 112 int sc_vhid; 113 int sc_advskew; 114 int sc_naddrs; 115 int sc_naddrs6; 116 int sc_advbase; /* seconds */ 117 int sc_init_counter; 118 u_int64_t sc_counter; 119 120 /* authentication */ 121 #define CARP_HMAC_PAD 64 122 unsigned char sc_key[CARP_KEY_LEN]; 123 unsigned char sc_pad[CARP_HMAC_PAD]; 124 SHA1_CTX sc_sha1; 125 126 struct callout sc_ad_tmo; /* advertisement timeout */ 127 struct callout sc_md_tmo; /* master down timeout */ 128 struct callout sc_md6_tmo; /* master down timeout */ 129 130 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 131 }; 132 133 int carp_suppress_preempt = 0; 134 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 135 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 136 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 137 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 138 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 139 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 140 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 141 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 142 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 143 144 struct carpstats carpstats; 145 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 146 &carpstats, carpstats, 147 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 148 149 struct carp_if { 150 TAILQ_HEAD(, carp_softc) vhif_vrs; 151 int vhif_nvrs; 152 153 struct callout cif_tmo; 154 struct ifnet *vhif_ifp; 155 struct mtx vhif_mtx; 156 }; 157 158 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ 159 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) 160 161 /* lock per carp_if queue */ 162 #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ 163 NULL, MTX_DEF) 164 #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) 165 #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) 166 #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) 167 #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) 168 169 #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) 170 #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) 171 #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) 172 173 #define CARP_LOG(...) do { \ 174 if (carp_opts[CARPCTL_LOG] > 0) \ 175 log(LOG_INFO, __VA_ARGS__); \ 176 } while (0) 177 178 #define CARP_DEBUG(...) do { \ 179 if (carp_opts[CARPCTL_LOG] > 1) \ 180 log(LOG_DEBUG, __VA_ARGS__); \ 181 } while (0) 182 183 static void carp_hmac_prepare(struct carp_softc *); 184 static void carp_hmac_generate(struct carp_softc *, u_int32_t *, 185 unsigned char *); 186 static int carp_hmac_verify(struct carp_softc *, u_int32_t *, 187 unsigned char *); 188 static void carp_setroute(struct carp_softc *, int); 189 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 190 static int carp_clone_create(struct if_clone *, int); 191 static void carp_clone_destroy(struct ifnet *); 192 static void carpdetach(struct carp_softc *); 193 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 194 struct carp_header *); 195 static void carp_send_ad_all(void); 196 static void carp_send_ad(void *); 197 static void carp_send_ad_locked(struct carp_softc *); 198 static void carp_send_arp(struct carp_softc *); 199 static void carp_master_down(void *); 200 static void carp_master_down_locked(struct carp_softc *); 201 static int carp_ioctl(struct ifnet *, u_long, caddr_t); 202 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 203 struct rtentry *); 204 static void carp_start(struct ifnet *); 205 static void carp_setrun(struct carp_softc *, sa_family_t); 206 static void carp_set_state(struct carp_softc *, int); 207 static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); 208 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 209 210 static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 211 static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); 212 static void carp_carpdev_state1(void *); 213 static void carp_carpdev_state_locked(struct carp_if *); 214 static void carp_sc_state_locked(struct carp_softc *); 215 #ifdef INET6 216 static void carp_send_na(struct carp_softc *); 217 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 218 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 219 #endif 220 221 static LIST_HEAD(, carp_softc) carpif_list; 222 static struct mtx carp_mtx; 223 IFC_SIMPLE_DECLARE(carp, 0); 224 225 static __inline u_int16_t 226 carp_cksum(struct mbuf *m, int len) 227 { 228 return (in_cksum(m, len)); 229 } 230 231 static void 232 carp_hmac_prepare(struct carp_softc *sc) 233 { 234 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 235 u_int8_t vhid = sc->sc_vhid & 0xff; 236 struct ifaddr *ifa; 237 int i; 238 #ifdef INET6 239 struct in6_addr in6; 240 #endif 241 242 if (sc->sc_carpdev) 243 CARP_SCLOCK(sc); 244 245 /* XXX: possible race here */ 246 247 /* compute ipad from key */ 248 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 249 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 250 for (i = 0; i < sizeof(sc->sc_pad); i++) 251 sc->sc_pad[i] ^= 0x36; 252 253 /* precompute first part of inner hash */ 254 SHA1Init(&sc->sc_sha1); 255 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 256 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 257 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 258 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 259 #ifdef INET 260 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 261 if (ifa->ifa_addr->sa_family == AF_INET) 262 SHA1Update(&sc->sc_sha1, 263 (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, 264 sizeof(struct in_addr)); 265 } 266 #endif /* INET */ 267 #ifdef INET6 268 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 269 if (ifa->ifa_addr->sa_family == AF_INET6) { 270 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 271 if (IN6_IS_ADDR_LINKLOCAL(&in6)) 272 in6.s6_addr16[1] = 0; 273 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 274 } 275 } 276 #endif /* INET6 */ 277 278 /* convert ipad to opad */ 279 for (i = 0; i < sizeof(sc->sc_pad); i++) 280 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 281 282 if (sc->sc_carpdev) 283 CARP_SCUNLOCK(sc); 284 } 285 286 static void 287 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], 288 unsigned char md[20]) 289 { 290 SHA1_CTX sha1ctx; 291 292 /* fetch first half of inner hash */ 293 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 294 295 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 296 SHA1Final(md, &sha1ctx); 297 298 /* outer hash */ 299 SHA1Init(&sha1ctx); 300 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 301 SHA1Update(&sha1ctx, md, 20); 302 SHA1Final(md, &sha1ctx); 303 } 304 305 static int 306 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], 307 unsigned char md[20]) 308 { 309 unsigned char md2[20]; 310 311 CARP_SCLOCK_ASSERT(sc); 312 313 carp_hmac_generate(sc, counter, md2); 314 315 return (bcmp(md, md2, sizeof(md2))); 316 } 317 318 static void 319 carp_setroute(struct carp_softc *sc, int cmd) 320 { 321 struct ifaddr *ifa; 322 int s; 323 324 if (sc->sc_carpdev) 325 CARP_SCLOCK_ASSERT(sc); 326 327 s = splnet(); 328 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 329 if (ifa->ifa_addr->sa_family == AF_INET && 330 sc->sc_carpdev != NULL) { 331 int count = carp_addrcount( 332 (struct carp_if *)sc->sc_carpdev->if_carp, 333 ifatoia(ifa), CARP_COUNT_MASTER); 334 335 if ((cmd == RTM_ADD && count == 1) || 336 (cmd == RTM_DELETE && count == 0)) 337 rtinit(ifa, cmd, RTF_UP | RTF_HOST); 338 } 339 #ifdef INET6 340 if (ifa->ifa_addr->sa_family == AF_INET6) { 341 if (cmd == RTM_ADD) 342 in6_ifaddloop(ifa); 343 else 344 in6_ifremloop(ifa); 345 } 346 #endif /* INET6 */ 347 } 348 splx(s); 349 } 350 351 static int 352 carp_clone_create(struct if_clone *ifc, int unit) 353 { 354 355 struct carp_softc *sc; 356 struct ifnet *ifp; 357 358 MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 359 360 sc->sc_flags_backup = 0; 361 sc->sc_suppress = 0; 362 sc->sc_advbase = CARP_DFLTINTV; 363 sc->sc_vhid = -1; /* required setting */ 364 sc->sc_advskew = 0; 365 sc->sc_init_counter = 1; 366 sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ 367 #ifdef INET6 368 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 369 #endif 370 371 callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE); 372 callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE); 373 callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE); 374 375 ifp = &sc->sc_if; 376 ifp->if_softc = sc; 377 if_initname(ifp, CARP_IFNAME, unit); 378 ifp->if_mtu = ETHERMTU; 379 ifp->if_flags = 0; 380 ifp->if_ioctl = carp_ioctl; 381 ifp->if_output = carp_looutput; 382 ifp->if_start = carp_start; 383 ifp->if_type = IFT_CARP; 384 ifp->if_snd.ifq_maxlen = ifqmaxlen; 385 ifp->if_hdrlen = 0; 386 if_attach(ifp); 387 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t)); 388 mtx_lock(&carp_mtx); 389 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 390 mtx_unlock(&carp_mtx); 391 return (0); 392 } 393 394 static void 395 carp_clone_destroy(struct ifnet *ifp) 396 { 397 struct carp_softc *sc = ifp->if_softc; 398 struct carp_if *cif; 399 struct ip_moptions *imo = &sc->sc_imo; 400 #ifdef INET6 401 struct ip6_moptions *im6o = &sc->sc_im6o; 402 #endif 403 404 /* carpdetach(sc); */ 405 406 callout_stop(&sc->sc_ad_tmo); 407 callout_stop(&sc->sc_md_tmo); 408 callout_stop(&sc->sc_md6_tmo); 409 410 if (imo->imo_num_memberships) { 411 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 412 imo->imo_multicast_ifp = NULL; 413 } 414 #ifdef INET6 415 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 416 struct in6_multi_mship *imm = 417 LIST_FIRST(&im6o->im6o_memberships); 418 LIST_REMOVE(imm, i6mm_chain); 419 in6_leavegroup(imm); 420 } 421 im6o->im6o_multicast_ifp = NULL; 422 #endif 423 424 /* Remove ourself from parents if_carp queue */ 425 if (sc->sc_carpdev && (cif = sc->sc_carpdev->if_carp)) { 426 CARP_LOCK(cif); 427 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 428 if (!--cif->vhif_nvrs) { 429 callout_drain(&cif->cif_tmo); 430 sc->sc_carpdev->if_carp = NULL; 431 CARP_LOCK_DESTROY(cif); 432 FREE(cif, M_CARP); 433 ifpromisc(sc->sc_carpdev, 0); 434 } else { 435 CARP_UNLOCK(cif); 436 } 437 } 438 439 mtx_lock(&carp_mtx); 440 LIST_REMOVE(sc, sc_next); 441 mtx_unlock(&carp_mtx); 442 bpfdetach(ifp); 443 if_detach(ifp); 444 free(sc, M_CARP); 445 } 446 447 /* 448 * process input packet. 449 * we have rearranged checks order compared to the rfc, 450 * but it seems more efficient this way or not possible otherwise. 451 */ 452 void 453 carp_input(struct mbuf *m, int hlen) 454 { 455 struct ip *ip = mtod(m, struct ip *); 456 struct carp_header *ch; 457 int iplen, len; 458 459 carpstats.carps_ipackets++; 460 461 if (!carp_opts[CARPCTL_ALLOW]) { 462 m_freem(m); 463 return; 464 } 465 466 /* check if received on a valid carp interface */ 467 if (m->m_pkthdr.rcvif->if_carp == NULL) { 468 carpstats.carps_badif++; 469 CARP_LOG("carp_input: packet received on non-carp " 470 "interface: %s\n", 471 m->m_pkthdr.rcvif->if_xname); 472 m_freem(m); 473 return; 474 } 475 476 /* verify that the IP TTL is 255. */ 477 if (ip->ip_ttl != CARP_DFLTTL) { 478 carpstats.carps_badttl++; 479 CARP_LOG("carp_input: received ttl %d != 255i on %s\n", 480 ip->ip_ttl, 481 m->m_pkthdr.rcvif->if_xname); 482 m_freem(m); 483 return; 484 } 485 486 iplen = ip->ip_hl << 2; 487 488 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 489 carpstats.carps_badlen++; 490 CARP_LOG("carp_input: received len %zd < " 491 "sizeof(struct carp_header)\n", 492 m->m_len - sizeof(struct ip)); 493 m_freem(m); 494 return; 495 } 496 497 if (iplen + sizeof(*ch) < m->m_len) { 498 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 499 carpstats.carps_hdrops++; 500 CARP_LOG("carp_input: pullup failed\n"); 501 return; 502 } 503 ip = mtod(m, struct ip *); 504 } 505 ch = (struct carp_header *)((char *)ip + iplen); 506 507 /* 508 * verify that the received packet length is 509 * equal to the CARP header 510 */ 511 len = iplen + sizeof(*ch); 512 if (len > m->m_pkthdr.len) { 513 carpstats.carps_badlen++; 514 CARP_LOG("carp_input: packet too short %d on %s\n", 515 m->m_pkthdr.len, 516 m->m_pkthdr.rcvif->if_xname); 517 m_freem(m); 518 return; 519 } 520 521 if ((m = m_pullup(m, len)) == NULL) { 522 carpstats.carps_hdrops++; 523 return; 524 } 525 ip = mtod(m, struct ip *); 526 ch = (struct carp_header *)((char *)ip + iplen); 527 528 /* verify the CARP checksum */ 529 m->m_data += iplen; 530 if (carp_cksum(m, len - iplen)) { 531 carpstats.carps_badsum++; 532 CARP_LOG("carp_input: checksum failed on %s\n", 533 m->m_pkthdr.rcvif->if_xname); 534 m_freem(m); 535 return; 536 } 537 m->m_data -= iplen; 538 539 carp_input_c(m, ch, AF_INET); 540 } 541 542 #ifdef INET6 543 int 544 carp6_input(struct mbuf **mp, int *offp, int proto) 545 { 546 struct mbuf *m = *mp; 547 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 548 struct carp_header *ch; 549 u_int len; 550 551 carpstats.carps_ipackets6++; 552 553 if (!carp_opts[CARPCTL_ALLOW]) { 554 m_freem(m); 555 return (IPPROTO_DONE); 556 } 557 558 /* check if received on a valid carp interface */ 559 if (m->m_pkthdr.rcvif->if_carp == NULL) { 560 carpstats.carps_badif++; 561 CARP_LOG("carp6_input: packet received on non-carp " 562 "interface: %s\n", 563 m->m_pkthdr.rcvif->if_xname); 564 m_freem(m); 565 return (IPPROTO_DONE); 566 } 567 568 /* verify that the IP TTL is 255 */ 569 if (ip6->ip6_hlim != CARP_DFLTTL) { 570 carpstats.carps_badttl++; 571 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 572 ip6->ip6_hlim, 573 m->m_pkthdr.rcvif->if_xname); 574 m_freem(m); 575 return (IPPROTO_DONE); 576 } 577 578 /* verify that we have a complete carp packet */ 579 len = m->m_len; 580 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 581 if (ch == NULL) { 582 carpstats.carps_badlen++; 583 CARP_LOG("carp6_input: packet size %u too small\n", len); 584 return (IPPROTO_DONE); 585 } 586 587 588 /* verify the CARP checksum */ 589 m->m_data += *offp; 590 if (carp_cksum(m, sizeof(*ch))) { 591 carpstats.carps_badsum++; 592 CARP_LOG("carp6_input: checksum failed, on %s\n", 593 m->m_pkthdr.rcvif->if_xname); 594 m_freem(m); 595 return (IPPROTO_DONE); 596 } 597 m->m_data -= *offp; 598 599 carp_input_c(m, ch, AF_INET6); 600 return (IPPROTO_DONE); 601 } 602 #endif /* INET6 */ 603 604 static void 605 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 606 { 607 struct ifnet *ifp = m->m_pkthdr.rcvif; 608 struct carp_softc *sc; 609 u_int64_t tmp_counter; 610 struct timeval sc_tv, ch_tv; 611 612 /* verify that the VHID is valid on the receiving interface */ 613 CARP_LOCK(ifp->if_carp); 614 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 615 if (sc->sc_vhid == ch->carp_vhid) 616 break; 617 618 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 619 (IFF_UP|IFF_RUNNING)) { 620 carpstats.carps_badvhid++; 621 CARP_UNLOCK(ifp->if_carp); 622 m_freem(m); 623 return; 624 } 625 626 getmicrotime(&sc->sc_if.if_lastchange); 627 sc->sc_if.if_ipackets++; 628 sc->sc_if.if_ibytes += m->m_pkthdr.len; 629 630 if (sc->sc_if.if_bpf) { 631 struct ip *ip = mtod(m, struct ip *); 632 uint32_t af1 = af; 633 634 /* BPF wants net byte order */ 635 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 636 ip->ip_off = htons(ip->ip_off); 637 bpf_mtap2(sc->sc_if.if_bpf, &af1, sizeof(af1), m); 638 } 639 640 /* verify the CARP version. */ 641 if (ch->carp_version != CARP_VERSION) { 642 carpstats.carps_badver++; 643 sc->sc_if.if_ierrors++; 644 CARP_UNLOCK(ifp->if_carp); 645 CARP_LOG("%s; invalid version %d\n", 646 sc->sc_if.if_xname, 647 ch->carp_version); 648 m_freem(m); 649 return; 650 } 651 652 /* verify the hash */ 653 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 654 carpstats.carps_badauth++; 655 sc->sc_if.if_ierrors++; 656 CARP_UNLOCK(ifp->if_carp); 657 CARP_LOG("%s: incorrect hash\n", sc->sc_if.if_xname); 658 m_freem(m); 659 return; 660 } 661 662 tmp_counter = ntohl(ch->carp_counter[0]); 663 tmp_counter = tmp_counter<<32; 664 tmp_counter += ntohl(ch->carp_counter[1]); 665 666 /* XXX Replay protection goes here */ 667 668 sc->sc_init_counter = 0; 669 sc->sc_counter = tmp_counter; 670 671 sc_tv.tv_sec = sc->sc_advbase; 672 if (carp_suppress_preempt && sc->sc_advskew < 240) 673 sc_tv.tv_usec = 240 * 1000000 / 256; 674 else 675 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 676 ch_tv.tv_sec = ch->carp_advbase; 677 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 678 679 switch (sc->sc_state) { 680 case INIT: 681 break; 682 case MASTER: 683 /* 684 * If we receive an advertisement from a master who's going to 685 * be more frequent than us, go into BACKUP state. 686 */ 687 if (timevalcmp(&sc_tv, &ch_tv, >) || 688 timevalcmp(&sc_tv, &ch_tv, ==)) { 689 callout_stop(&sc->sc_ad_tmo); 690 CARP_DEBUG("%s: MASTER -> BACKUP " 691 "(more frequent advertisement received)\n", 692 sc->sc_if.if_xname); 693 carp_set_state(sc, BACKUP); 694 carp_setrun(sc, 0); 695 carp_setroute(sc, RTM_DELETE); 696 } 697 break; 698 case BACKUP: 699 /* 700 * If we're pre-empting masters who advertise slower than us, 701 * and this one claims to be slower, treat him as down. 702 */ 703 if (carp_opts[CARPCTL_PREEMPT] && 704 timevalcmp(&sc_tv, &ch_tv, <)) { 705 CARP_DEBUG("%s: BACKUP -> MASTER " 706 "(preempting a slower master)\n", 707 sc->sc_if.if_xname); 708 carp_master_down_locked(sc); 709 break; 710 } 711 712 /* 713 * If the master is going to advertise at such a low frequency 714 * that he's guaranteed to time out, we'd might as well just 715 * treat him as timed out now. 716 */ 717 sc_tv.tv_sec = sc->sc_advbase * 3; 718 if (timevalcmp(&sc_tv, &ch_tv, <)) { 719 CARP_DEBUG("%s: BACKUP -> MASTER " 720 "(master timed out)\n", 721 sc->sc_if.if_xname); 722 carp_master_down_locked(sc); 723 break; 724 } 725 726 /* 727 * Otherwise, we reset the counter and wait for the next 728 * advertisement. 729 */ 730 carp_setrun(sc, af); 731 break; 732 } 733 734 CARP_UNLOCK(ifp->if_carp); 735 736 m_freem(m); 737 return; 738 } 739 740 static void 741 carpdetach(struct carp_softc *sc) 742 { 743 struct ifaddr *ifa; 744 745 callout_stop(&sc->sc_ad_tmo); 746 callout_stop(&sc->sc_md_tmo); 747 callout_stop(&sc->sc_md6_tmo); 748 749 while ((ifa = TAILQ_FIRST(&sc->sc_if.if_addrlist)) != NULL) 750 if (ifa->ifa_addr->sa_family == AF_INET) { 751 struct in_ifaddr *ia = ifatoia(ifa); 752 753 carp_del_addr(sc, &ia->ia_addr); 754 755 /* ripped screaming from in_control(SIOCDIFADDR) */ 756 in_ifscrub(&sc->sc_if, ia); 757 TAILQ_REMOVE(&sc->sc_if.if_addrlist, ifa, ifa_link); 758 TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); 759 IFAFREE((&ia->ia_ifa)); 760 } 761 } 762 763 /* Detach an interface from the carp. */ 764 void 765 carp_ifdetach(struct ifnet *ifp) 766 { 767 struct carp_softc *sc; 768 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 769 770 CARP_LOCK(cif); 771 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 772 carpdetach(sc); 773 CARP_UNLOCK(cif); 774 } 775 776 static int 777 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 778 { 779 struct m_tag *mtag; 780 struct ifnet *ifp = &sc->sc_if; 781 782 if (sc->sc_init_counter) { 783 /* this could also be seconds since unix epoch */ 784 sc->sc_counter = arc4random(); 785 sc->sc_counter = sc->sc_counter << 32; 786 sc->sc_counter += arc4random(); 787 } else 788 sc->sc_counter++; 789 790 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 791 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 792 793 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 794 795 /* Tag packet for carp_output */ 796 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); 797 if (mtag == NULL) { 798 m_freem(m); 799 sc->sc_if.if_oerrors++; 800 return (ENOMEM); 801 } 802 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 803 m_tag_prepend(m, mtag); 804 805 return (0); 806 } 807 808 static void 809 carp_send_ad_all(void) 810 { 811 struct carp_softc *sc; 812 813 mtx_lock(&carp_mtx); 814 LIST_FOREACH(sc, &carpif_list, sc_next) { 815 if (sc->sc_carpdev == NULL) 816 continue; 817 CARP_SCLOCK(sc); 818 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) && 819 sc->sc_state == MASTER) 820 carp_send_ad_locked(sc); 821 CARP_SCUNLOCK(sc); 822 } 823 mtx_unlock(&carp_mtx); 824 } 825 826 static void 827 carp_send_ad(void *v) 828 { 829 struct carp_softc *sc = v; 830 831 CARP_SCLOCK(sc); 832 carp_send_ad_locked(sc); 833 CARP_SCUNLOCK(sc); 834 } 835 836 static void 837 carp_send_ad_locked(struct carp_softc *sc) 838 { 839 struct carp_header ch; 840 struct timeval tv; 841 struct carp_header *ch_ptr; 842 struct mbuf *m; 843 int len, advbase, advskew; 844 845 CARP_SCLOCK_ASSERT(sc); 846 847 /* bow out if we've lost our UPness or RUNNINGuiness */ 848 if ((sc->sc_if.if_flags & 849 (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 850 advbase = 255; 851 advskew = 255; 852 } else { 853 advbase = sc->sc_advbase; 854 if (!carp_suppress_preempt || sc->sc_advskew > 240) 855 advskew = sc->sc_advskew; 856 else 857 advskew = 240; 858 tv.tv_sec = advbase; 859 tv.tv_usec = advskew * 1000000 / 256; 860 } 861 862 ch.carp_version = CARP_VERSION; 863 ch.carp_type = CARP_ADVERTISEMENT; 864 ch.carp_vhid = sc->sc_vhid; 865 ch.carp_advbase = advbase; 866 ch.carp_advskew = advskew; 867 ch.carp_authlen = 7; /* XXX DEFINE */ 868 ch.carp_pad1 = 0; /* must be zero */ 869 ch.carp_cksum = 0; 870 871 #ifdef INET 872 if (sc->sc_ia) { 873 struct ip *ip; 874 875 MGETHDR(m, M_DONTWAIT, MT_HEADER); 876 if (m == NULL) { 877 sc->sc_ac.ac_if.if_oerrors++; 878 carpstats.carps_onomem++; 879 /* XXX maybe less ? */ 880 if (advbase != 255 || advskew != 255) 881 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 882 carp_send_ad, sc); 883 return; 884 } 885 len = sizeof(*ip) + sizeof(ch); 886 m->m_pkthdr.len = len; 887 m->m_pkthdr.rcvif = NULL; 888 m->m_len = len; 889 MH_ALIGN(m, m->m_len); 890 m->m_flags |= M_MCAST; 891 ip = mtod(m, struct ip *); 892 ip->ip_v = IPVERSION; 893 ip->ip_hl = sizeof(*ip) >> 2; 894 ip->ip_tos = IPTOS_LOWDELAY; 895 ip->ip_len = len; 896 ip->ip_id = ip_newid(); 897 ip->ip_off = IP_DF; 898 ip->ip_ttl = CARP_DFLTTL; 899 ip->ip_p = IPPROTO_CARP; 900 ip->ip_sum = 0; 901 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; 902 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 903 904 ch_ptr = (struct carp_header *)(&ip[1]); 905 bcopy(&ch, ch_ptr, sizeof(ch)); 906 if (carp_prepare_ad(m, sc, ch_ptr)) 907 return; 908 909 m->m_data += sizeof(*ip); 910 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 911 m->m_data -= sizeof(*ip); 912 913 getmicrotime(&sc->sc_if.if_lastchange); 914 sc->sc_ac.ac_if.if_opackets++; 915 sc->sc_ac.ac_if.if_obytes += len; 916 carpstats.carps_opackets++; 917 918 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 919 sc->sc_if.if_oerrors++; 920 if (sc->sc_sendad_errors < INT_MAX) 921 sc->sc_sendad_errors++; 922 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 923 carp_suppress_preempt++; 924 if (carp_suppress_preempt == 1) { 925 CARP_SCUNLOCK(sc); 926 carp_send_ad_all(); 927 CARP_SCLOCK(sc); 928 } 929 } 930 sc->sc_sendad_success = 0; 931 } else { 932 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 933 if (++sc->sc_sendad_success >= 934 CARP_SENDAD_MIN_SUCCESS) { 935 carp_suppress_preempt--; 936 sc->sc_sendad_errors = 0; 937 } 938 } else 939 sc->sc_sendad_errors = 0; 940 } 941 } 942 #endif /* INET */ 943 #ifdef INET6 944 if (sc->sc_ia6) { 945 struct ip6_hdr *ip6; 946 947 MGETHDR(m, M_DONTWAIT, MT_HEADER); 948 if (m == NULL) { 949 sc->sc_ac.ac_if.if_oerrors++; 950 carpstats.carps_onomem++; 951 /* XXX maybe less ? */ 952 if (advbase != 255 || advskew != 255) 953 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 954 carp_send_ad, sc); 955 return; 956 } 957 len = sizeof(*ip6) + sizeof(ch); 958 m->m_pkthdr.len = len; 959 m->m_pkthdr.rcvif = NULL; 960 m->m_len = len; 961 MH_ALIGN(m, m->m_len); 962 m->m_flags |= M_MCAST; 963 ip6 = mtod(m, struct ip6_hdr *); 964 bzero(ip6, sizeof(*ip6)); 965 ip6->ip6_vfc |= IPV6_VERSION; 966 ip6->ip6_hlim = CARP_DFLTTL; 967 ip6->ip6_nxt = IPPROTO_CARP; 968 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 969 sizeof(struct in6_addr)); 970 /* set the multicast destination */ 971 972 ip6->ip6_dst.s6_addr8[0] = 0xff; 973 ip6->ip6_dst.s6_addr8[1] = 0x02; 974 ip6->ip6_dst.s6_addr8[15] = 0x12; 975 976 ch_ptr = (struct carp_header *)(&ip6[1]); 977 bcopy(&ch, ch_ptr, sizeof(ch)); 978 if (carp_prepare_ad(m, sc, ch_ptr)) 979 return; 980 981 m->m_data += sizeof(*ip6); 982 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 983 m->m_data -= sizeof(*ip6); 984 985 getmicrotime(&sc->sc_if.if_lastchange); 986 sc->sc_if.if_opackets++; 987 sc->sc_if.if_obytes += len; 988 carpstats.carps_opackets6++; 989 990 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 991 sc->sc_if.if_oerrors++; 992 if (sc->sc_sendad_errors < INT_MAX) 993 sc->sc_sendad_errors++; 994 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 995 carp_suppress_preempt++; 996 if (carp_suppress_preempt == 1) { 997 CARP_SCUNLOCK(sc); 998 carp_send_ad_all(); 999 CARP_SCLOCK(sc); 1000 } 1001 } 1002 sc->sc_sendad_success = 0; 1003 } else { 1004 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1005 if (++sc->sc_sendad_success >= 1006 CARP_SENDAD_MIN_SUCCESS) { 1007 carp_suppress_preempt--; 1008 sc->sc_sendad_errors = 0; 1009 } 1010 } else 1011 sc->sc_sendad_errors = 0; 1012 } 1013 } 1014 #endif /* INET6 */ 1015 1016 if (advbase != 255 || advskew != 255) 1017 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1018 carp_send_ad, sc); 1019 1020 } 1021 1022 /* 1023 * Broadcast a gratuitous ARP request containing 1024 * the virtual router MAC address for each IP address 1025 * associated with the virtual router. 1026 */ 1027 static void 1028 carp_send_arp(struct carp_softc *sc) 1029 { 1030 struct ifaddr *ifa; 1031 1032 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1033 1034 if (ifa->ifa_addr->sa_family != AF_INET) 1035 continue; 1036 1037 /* arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); */ 1038 arp_ifinit2(sc->sc_carpdev, ifa, sc->sc_ac.ac_enaddr); 1039 1040 DELAY(1000); /* XXX */ 1041 } 1042 } 1043 1044 #ifdef INET6 1045 static void 1046 carp_send_na(struct carp_softc *sc) 1047 { 1048 struct ifaddr *ifa; 1049 struct in6_addr *in6; 1050 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1051 1052 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1053 1054 if (ifa->ifa_addr->sa_family != AF_INET6) 1055 continue; 1056 1057 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1058 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1059 ND_NA_FLAG_OVERRIDE, 1, NULL); 1060 DELAY(1000); /* XXX */ 1061 } 1062 } 1063 #endif /* INET6 */ 1064 1065 static int 1066 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) 1067 { 1068 struct carp_softc *vh; 1069 struct ifaddr *ifa; 1070 int count = 0; 1071 1072 CARP_LOCK_ASSERT(cif); 1073 1074 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1075 if ((type == CARP_COUNT_RUNNING && 1076 (vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1077 (IFF_UP|IFF_RUNNING)) || 1078 (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { 1079 TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist, 1080 ifa_list) { 1081 if (ifa->ifa_addr->sa_family == AF_INET && 1082 ia->ia_addr.sin_addr.s_addr == 1083 ifatoia(ifa)->ia_addr.sin_addr.s_addr) 1084 count++; 1085 } 1086 } 1087 } 1088 return (count); 1089 } 1090 1091 int 1092 carp_iamatch(void *v, struct in_ifaddr *ia, 1093 struct in_addr *isaddr, u_int8_t **enaddr) 1094 { 1095 struct carp_if *cif = v; 1096 struct carp_softc *vh; 1097 int index, count = 0; 1098 struct ifaddr *ifa; 1099 1100 CARP_LOCK(cif); 1101 1102 if (carp_opts[CARPCTL_ARPBALANCE]) { 1103 /* 1104 * XXX proof of concept implementation. 1105 * We use the source ip to decide which virtual host should 1106 * handle the request. If we're master of that virtual host, 1107 * then we respond, otherwise, just drop the arp packet on 1108 * the floor. 1109 */ 1110 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); 1111 if (count == 0) { 1112 /* should never reach this */ 1113 CARP_UNLOCK(cif); 1114 return (0); 1115 } 1116 1117 /* this should be a hash, like pf_hash() */ 1118 index = isaddr->s_addr % count; 1119 count = 0; 1120 1121 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1122 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1123 (IFF_UP|IFF_RUNNING)) { 1124 TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, 1125 ifa_list) { 1126 if (ifa->ifa_addr->sa_family == 1127 AF_INET && 1128 ia->ia_addr.sin_addr.s_addr == 1129 ifatoia(ifa)->ia_addr.sin_addr.s_addr) { 1130 if (count == index) { 1131 if (vh->sc_state == 1132 MASTER) { 1133 *enaddr = vh->sc_ac.ac_enaddr; 1134 CARP_UNLOCK(cif); 1135 return (1); 1136 } else { 1137 CARP_UNLOCK(cif); 1138 return (0); 1139 } 1140 } 1141 count++; 1142 } 1143 } 1144 } 1145 } 1146 } else { 1147 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1148 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1149 (IFF_UP|IFF_RUNNING) && ia->ia_ifp == 1150 &vh->sc_if) { 1151 *enaddr = vh->sc_ac.ac_enaddr; 1152 CARP_UNLOCK(cif); 1153 return (1); 1154 } 1155 } 1156 } 1157 CARP_UNLOCK(cif); 1158 return (0); 1159 } 1160 1161 #ifdef INET6 1162 struct ifaddr * 1163 carp_iamatch6(void *v, struct in6_addr *taddr) 1164 { 1165 struct carp_if *cif = v; 1166 struct carp_softc *vh; 1167 struct ifaddr *ifa; 1168 1169 CARP_LOCK(cif); 1170 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1171 TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { 1172 if (IN6_ARE_ADDR_EQUAL(taddr, 1173 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1174 ((vh->sc_if.if_flags & 1175 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { 1176 CARP_UNLOCK(cif); 1177 return (ifa); 1178 } 1179 } 1180 } 1181 CARP_UNLOCK(cif); 1182 1183 return (NULL); 1184 } 1185 1186 void * 1187 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1188 { 1189 struct m_tag *mtag; 1190 struct carp_if *cif = v; 1191 struct carp_softc *sc; 1192 struct ifaddr *ifa; 1193 1194 CARP_LOCK(cif); 1195 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1196 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1197 if (IN6_ARE_ADDR_EQUAL(taddr, 1198 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1199 ((sc->sc_if.if_flags & 1200 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { 1201 struct ifnet *ifp = &sc->sc_if; 1202 mtag = m_tag_get(PACKET_TAG_CARP, 1203 sizeof(struct ifnet *), M_NOWAIT); 1204 if (mtag == NULL) { 1205 /* better a bit than nothing */ 1206 CARP_UNLOCK(cif); 1207 return (sc->sc_ac.ac_enaddr); 1208 } 1209 bcopy(&ifp, (caddr_t)(mtag + 1), 1210 sizeof(struct ifnet *)); 1211 m_tag_prepend(m, mtag); 1212 1213 CARP_UNLOCK(cif); 1214 return (sc->sc_ac.ac_enaddr); 1215 } 1216 } 1217 } 1218 CARP_UNLOCK(cif); 1219 1220 return (NULL); 1221 } 1222 #endif 1223 1224 struct ifnet * 1225 carp_forus(void *v, void *dhost) 1226 { 1227 struct carp_if *cif = v; 1228 struct carp_softc *vh; 1229 u_int8_t *ena = dhost; 1230 1231 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1232 return (NULL); 1233 1234 CARP_LOCK(cif); 1235 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) 1236 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1237 (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && 1238 !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) { 1239 CARP_UNLOCK(cif); 1240 return (&vh->sc_if); 1241 } 1242 1243 CARP_UNLOCK(cif); 1244 return (NULL); 1245 } 1246 1247 static void 1248 carp_master_down(void *v) 1249 { 1250 struct carp_softc *sc = v; 1251 1252 CARP_SCLOCK(sc); 1253 carp_master_down_locked(sc); 1254 CARP_SCUNLOCK(sc); 1255 } 1256 1257 static void 1258 carp_master_down_locked(struct carp_softc *sc) 1259 { 1260 if (sc->sc_carpdev) 1261 CARP_SCLOCK_ASSERT(sc); 1262 1263 switch (sc->sc_state) { 1264 case INIT: 1265 printf("%s: master_down event in INIT state\n", 1266 sc->sc_if.if_xname); 1267 break; 1268 case MASTER: 1269 break; 1270 case BACKUP: 1271 carp_set_state(sc, MASTER); 1272 carp_send_ad_locked(sc); 1273 carp_send_arp(sc); 1274 #ifdef INET6 1275 carp_send_na(sc); 1276 #endif /* INET6 */ 1277 carp_setrun(sc, 0); 1278 carp_setroute(sc, RTM_ADD); 1279 break; 1280 } 1281 } 1282 1283 /* 1284 * When in backup state, af indicates whether to reset the master down timer 1285 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1286 */ 1287 static void 1288 carp_setrun(struct carp_softc *sc, sa_family_t af) 1289 { 1290 struct timeval tv; 1291 1292 if (sc->sc_carpdev) 1293 CARP_SCLOCK_ASSERT(sc); 1294 1295 if (sc->sc_if.if_flags & IFF_UP && 1296 sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) 1297 sc->sc_if.if_flags |= IFF_RUNNING; 1298 else { 1299 sc->sc_if.if_flags &= ~IFF_RUNNING; 1300 carp_setroute(sc, RTM_DELETE); 1301 return; 1302 } 1303 1304 switch (sc->sc_state) { 1305 case INIT: 1306 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1307 carp_send_ad_locked(sc); 1308 carp_send_arp(sc); 1309 #ifdef INET6 1310 carp_send_na(sc); 1311 #endif /* INET6 */ 1312 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1313 sc->sc_if.if_xname); 1314 carp_set_state(sc, MASTER); 1315 carp_setroute(sc, RTM_ADD); 1316 } else { 1317 CARP_DEBUG("%s: INIT -> BACKUP\n", sc->sc_if.if_xname); 1318 carp_set_state(sc, BACKUP); 1319 carp_setroute(sc, RTM_DELETE); 1320 carp_setrun(sc, 0); 1321 } 1322 break; 1323 case BACKUP: 1324 callout_stop(&sc->sc_ad_tmo); 1325 tv.tv_sec = 3 * sc->sc_advbase; 1326 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1327 switch (af) { 1328 #ifdef INET 1329 case AF_INET: 1330 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1331 carp_master_down, sc); 1332 break; 1333 #endif /* INET */ 1334 #ifdef INET6 1335 case AF_INET6: 1336 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1337 carp_master_down, sc); 1338 break; 1339 #endif /* INET6 */ 1340 default: 1341 if (sc->sc_naddrs) 1342 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1343 carp_master_down, sc); 1344 if (sc->sc_naddrs6) 1345 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1346 carp_master_down, sc); 1347 break; 1348 } 1349 break; 1350 case MASTER: 1351 tv.tv_sec = sc->sc_advbase; 1352 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1353 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1354 carp_send_ad, sc); 1355 break; 1356 } 1357 } 1358 1359 static int 1360 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1361 { 1362 struct ifnet *ifp; 1363 struct carp_if *cif; 1364 struct in_ifaddr *ia, *ia_if; 1365 struct ip_moptions *imo = &sc->sc_imo; 1366 struct in_addr addr; 1367 u_long iaddr = htonl(sin->sin_addr.s_addr); 1368 int own, error; 1369 1370 if (sin->sin_addr.s_addr == 0) { 1371 if (!(sc->sc_if.if_flags & IFF_UP)) 1372 carp_set_state(sc, INIT); 1373 if (sc->sc_naddrs) 1374 sc->sc_if.if_flags |= IFF_UP; 1375 carp_setrun(sc, 0); 1376 return (0); 1377 } 1378 1379 /* we have to do it by hands to check we won't match on us */ 1380 ia_if = NULL; own = 0; 1381 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { 1382 /* and, yeah, we need a multicast-capable iface too */ 1383 if (ia->ia_ifp != &sc->sc_if && 1384 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1385 (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1386 if (!ia_if) 1387 ia_if = ia; 1388 if (sin->sin_addr.s_addr == 1389 ia->ia_addr.sin_addr.s_addr) 1390 own++; 1391 } 1392 } 1393 1394 if (!ia_if) 1395 return (EADDRNOTAVAIL); 1396 1397 ia = ia_if; 1398 ifp = ia->ia_ifp; 1399 1400 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1401 (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) 1402 return (EADDRNOTAVAIL); 1403 1404 if (imo->imo_num_memberships == 0) { 1405 addr.s_addr = htonl(INADDR_CARP_GROUP); 1406 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 1407 return (ENOBUFS); 1408 imo->imo_num_memberships++; 1409 imo->imo_multicast_ifp = ifp; 1410 imo->imo_multicast_ttl = CARP_DFLTTL; 1411 imo->imo_multicast_loop = 0; 1412 } 1413 1414 if (!ifp->if_carp) { 1415 1416 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1417 M_WAITOK|M_ZERO); 1418 if (!cif) { 1419 error = ENOBUFS; 1420 goto cleanup; 1421 } 1422 if ((error = ifpromisc(ifp, 1))) { 1423 FREE(cif, M_CARP); 1424 goto cleanup; 1425 } 1426 1427 CARP_LOCK_INIT(cif); 1428 CARP_LOCK(cif); 1429 cif->vhif_ifp = ifp; 1430 TAILQ_INIT(&cif->vhif_vrs); 1431 callout_init(&cif->cif_tmo, NET_CALLOUT_MPSAFE); 1432 ifp->if_carp = cif; 1433 1434 } else { 1435 struct carp_softc *vr; 1436 1437 cif = (struct carp_if *)ifp->if_carp; 1438 CARP_LOCK(cif); 1439 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1440 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1441 CARP_UNLOCK(cif); 1442 error = EINVAL; 1443 goto cleanup; 1444 } 1445 } 1446 sc->sc_ia = ia; 1447 sc->sc_carpdev = ifp; 1448 1449 { /* XXX prevent endless loop if already in queue */ 1450 struct carp_softc *vr, *after = NULL; 1451 int myself = 0; 1452 cif = (struct carp_if *)ifp->if_carp; 1453 1454 /* XXX: cif should not change, right? So we still hold the lock */ 1455 CARP_LOCK_ASSERT(cif); 1456 1457 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1458 if (vr == sc) 1459 myself = 1; 1460 if (vr->sc_vhid < sc->sc_vhid) 1461 after = vr; 1462 } 1463 1464 if (!myself) { 1465 /* We're trying to keep things in order */ 1466 if (after == NULL) { 1467 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1468 } else { 1469 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1470 } 1471 cif->vhif_nvrs++; 1472 } 1473 } 1474 1475 sc->sc_naddrs++; 1476 sc->sc_if.if_flags |= IFF_UP; 1477 if (own) 1478 sc->sc_advskew = 0; 1479 carp_sc_state_locked(sc); 1480 carp_setrun(sc, 0); 1481 1482 CARP_UNLOCK(cif); 1483 1484 return (0); 1485 1486 cleanup: 1487 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1488 return (error); 1489 } 1490 1491 static int 1492 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1493 { 1494 int error = 0; 1495 1496 if (!--sc->sc_naddrs) { 1497 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1498 struct ip_moptions *imo = &sc->sc_imo; 1499 1500 CARP_LOCK(cif); 1501 callout_stop(&sc->sc_ad_tmo); 1502 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1503 sc->sc_vhid = -1; 1504 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1505 imo->imo_multicast_ifp = NULL; 1506 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1507 if (!--cif->vhif_nvrs) { 1508 callout_drain(&cif->cif_tmo); 1509 sc->sc_carpdev->if_carp = NULL; 1510 CARP_LOCK_DESTROY(cif); 1511 FREE(cif, M_IFADDR); 1512 } else { 1513 CARP_UNLOCK(cif); 1514 } 1515 } 1516 1517 return (error); 1518 } 1519 1520 #ifdef INET6 1521 static int 1522 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1523 { 1524 struct ifnet *ifp; 1525 struct carp_if *cif; 1526 struct in6_ifaddr *ia, *ia_if; 1527 struct ip6_moptions *im6o = &sc->sc_im6o; 1528 struct in6_multi_mship *imm; 1529 struct sockaddr_in6 addr; 1530 int own, error; 1531 1532 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1533 if (!(sc->sc_if.if_flags & IFF_UP)) 1534 carp_set_state(sc, INIT); 1535 if (sc->sc_naddrs6) 1536 sc->sc_if.if_flags |= IFF_UP; 1537 carp_setrun(sc, 0); 1538 return (0); 1539 } 1540 1541 /* we have to do it by hands to check we won't match on us */ 1542 ia_if = NULL; own = 0; 1543 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1544 int i; 1545 1546 for (i = 0; i < 4; i++) { 1547 if ((sin6->sin6_addr.s6_addr32[i] & 1548 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1549 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1550 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1551 break; 1552 } 1553 /* and, yeah, we need a multicast-capable iface too */ 1554 if (ia->ia_ifp != &sc->sc_ac.ac_if && 1555 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1556 (i == 4)) { 1557 if (!ia_if) 1558 ia_if = ia; 1559 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1560 &ia->ia_addr.sin6_addr)) 1561 own++; 1562 } 1563 } 1564 1565 if (!ia_if) 1566 return (EADDRNOTAVAIL); 1567 ia = ia_if; 1568 ifp = ia->ia_ifp; 1569 1570 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1571 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1572 return (EADDRNOTAVAIL); 1573 1574 if (!sc->sc_naddrs6) { 1575 im6o->im6o_multicast_ifp = ifp; 1576 1577 /* join CARP multicast address */ 1578 bzero(&addr, sizeof(addr)); 1579 addr.sin6_family = AF_INET6; 1580 addr.sin6_len = sizeof(addr); 1581 addr.sin6_addr.s6_addr16[0] = htons(0xff02); 1582 addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 1583 addr.sin6_addr.s6_addr8[15] = 0x12; 1584 if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) 1585 goto cleanup; 1586 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1587 1588 /* join solicited multicast address */ 1589 bzero(&addr.sin6_addr, sizeof(addr.sin6_addr)); 1590 addr.sin6_addr.s6_addr16[0] = htons(0xff02); 1591 addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 1592 addr.sin6_addr.s6_addr32[1] = 0; 1593 addr.sin6_addr.s6_addr32[2] = htonl(1); 1594 addr.sin6_addr.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1595 addr.sin6_addr.s6_addr8[12] = 0xff; 1596 if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) 1597 goto cleanup; 1598 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1599 } 1600 1601 if (!ifp->if_carp) { 1602 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, 1603 M_WAITOK|M_ZERO); 1604 if (!cif) { 1605 error = ENOBUFS; 1606 goto cleanup; 1607 } 1608 if ((error = ifpromisc(ifp, 1))) { 1609 FREE(cif, M_CARP); 1610 goto cleanup; 1611 } 1612 1613 CARP_LOCK_INIT(cif); 1614 CARP_LOCK(cif); 1615 cif->vhif_ifp = ifp; 1616 TAILQ_INIT(&cif->vhif_vrs); 1617 callout_init(&cif->cif_tmo, NET_CALLOUT_MPSAFE); 1618 ifp->if_carp = cif; 1619 1620 } else { 1621 struct carp_softc *vr; 1622 1623 cif = (struct carp_if *)ifp->if_carp; 1624 CARP_LOCK(cif); 1625 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1626 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1627 CARP_UNLOCK(cif); 1628 error = EINVAL; 1629 goto cleanup; 1630 } 1631 } 1632 sc->sc_ia6 = ia; 1633 sc->sc_carpdev = ifp; 1634 1635 { /* XXX prevent endless loop if already in queue */ 1636 struct carp_softc *vr, *after = NULL; 1637 int myself = 0; 1638 cif = (struct carp_if *)ifp->if_carp; 1639 CARP_LOCK_ASSERT(cif); 1640 1641 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1642 if (vr == sc) 1643 myself = 1; 1644 if (vr->sc_vhid < sc->sc_vhid) 1645 after = vr; 1646 } 1647 1648 if (!myself) { 1649 /* We're trying to keep things in order */ 1650 if (after == NULL) { 1651 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1652 } else { 1653 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1654 } 1655 cif->vhif_nvrs++; 1656 } 1657 } 1658 1659 sc->sc_naddrs6++; 1660 sc->sc_ac.ac_if.if_flags |= IFF_UP; 1661 if (own) 1662 sc->sc_advskew = 0; 1663 carp_sc_state_locked(sc); 1664 carp_setrun(sc, 0); 1665 1666 CARP_UNLOCK(cif); 1667 1668 return (0); 1669 1670 cleanup: 1671 /* clean up multicast memberships */ 1672 if (!sc->sc_naddrs6) { 1673 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1674 imm = LIST_FIRST(&im6o->im6o_memberships); 1675 LIST_REMOVE(imm, i6mm_chain); 1676 in6_leavegroup(imm); 1677 } 1678 } 1679 return (error); 1680 } 1681 1682 static int 1683 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1684 { 1685 int error = 0; 1686 1687 if (!--sc->sc_naddrs6) { 1688 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1689 struct ip6_moptions *im6o = &sc->sc_im6o; 1690 1691 CARP_LOCK(cif); 1692 callout_stop(&sc->sc_ad_tmo); 1693 sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1694 sc->sc_vhid = -1; 1695 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1696 struct in6_multi_mship *imm = 1697 LIST_FIRST(&im6o->im6o_memberships); 1698 1699 LIST_REMOVE(imm, i6mm_chain); 1700 in6_leavegroup(imm); 1701 } 1702 im6o->im6o_multicast_ifp = NULL; 1703 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1704 if (!--cif->vhif_nvrs) { 1705 callout_drain(&cif->cif_tmo); 1706 CARP_LOCK_DESTROY(cif); 1707 sc->sc_carpdev->if_carp = NULL; 1708 FREE(cif, M_IFADDR); 1709 } else 1710 CARP_UNLOCK(cif); 1711 } 1712 1713 return (error); 1714 } 1715 #endif /* INET6 */ 1716 1717 static int 1718 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1719 { 1720 struct carp_softc *sc = ifp->if_softc, *vr; 1721 struct carpreq carpr; 1722 struct ifaddr *ifa; 1723 struct ifreq *ifr; 1724 struct ifaliasreq *ifra; 1725 int locked = 0, error = 0; 1726 1727 ifa = (struct ifaddr *)addr; 1728 ifra = (struct ifaliasreq *)addr; 1729 ifr = (struct ifreq *)addr; 1730 1731 switch (cmd) { 1732 case SIOCSIFADDR: 1733 switch (ifa->ifa_addr->sa_family) { 1734 #ifdef INET 1735 case AF_INET: 1736 sc->sc_if.if_flags |= IFF_UP; 1737 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1738 sizeof(struct sockaddr)); 1739 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1740 break; 1741 #endif /* INET */ 1742 #ifdef INET6 1743 case AF_INET6: 1744 sc->sc_if.if_flags |= IFF_UP; 1745 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1746 break; 1747 #endif /* INET6 */ 1748 default: 1749 error = EAFNOSUPPORT; 1750 break; 1751 } 1752 break; 1753 1754 case SIOCAIFADDR: 1755 switch (ifa->ifa_addr->sa_family) { 1756 #ifdef INET 1757 case AF_INET: 1758 sc->sc_if.if_flags |= IFF_UP; 1759 bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, 1760 sizeof(struct sockaddr)); 1761 error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); 1762 break; 1763 #endif /* INET */ 1764 #ifdef INET6 1765 case AF_INET6: 1766 sc->sc_if.if_flags |= IFF_UP; 1767 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1768 break; 1769 #endif /* INET6 */ 1770 default: 1771 error = EAFNOSUPPORT; 1772 break; 1773 } 1774 break; 1775 1776 case SIOCDIFADDR: 1777 switch (ifa->ifa_addr->sa_family) { 1778 #ifdef INET 1779 case AF_INET: 1780 error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); 1781 break; 1782 #endif /* INET */ 1783 #ifdef INET6 1784 case AF_INET6: 1785 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1786 break; 1787 #endif /* INET6 */ 1788 default: 1789 error = EAFNOSUPPORT; 1790 break; 1791 } 1792 break; 1793 1794 case SIOCSIFFLAGS: 1795 if (sc->sc_carpdev) { 1796 locked = 1; 1797 CARP_SCLOCK(sc); 1798 } 1799 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1800 callout_stop(&sc->sc_ad_tmo); 1801 callout_stop(&sc->sc_md_tmo); 1802 callout_stop(&sc->sc_md6_tmo); 1803 if (sc->sc_state == MASTER) 1804 carp_send_ad_locked(sc); 1805 carp_set_state(sc, INIT); 1806 carp_setrun(sc, 0); 1807 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { 1808 sc->sc_if.if_flags |= IFF_UP; 1809 carp_setrun(sc, 0); 1810 } 1811 break; 1812 1813 case SIOCSVH: 1814 if ((error = suser(curthread)) != 0) 1815 break; 1816 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1817 break; 1818 error = 1; 1819 if (sc->sc_carpdev) { 1820 locked = 1; 1821 CARP_SCLOCK(sc); 1822 } 1823 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1824 switch (carpr.carpr_state) { 1825 case BACKUP: 1826 callout_stop(&sc->sc_ad_tmo); 1827 carp_set_state(sc, BACKUP); 1828 carp_setrun(sc, 0); 1829 carp_setroute(sc, RTM_DELETE); 1830 break; 1831 case MASTER: 1832 carp_master_down_locked(sc); 1833 break; 1834 default: 1835 break; 1836 } 1837 } 1838 if (carpr.carpr_vhid > 0) { 1839 if (carpr.carpr_vhid > 255) { 1840 error = EINVAL; 1841 break; 1842 } 1843 if (sc->sc_carpdev) { 1844 struct carp_if *cif; 1845 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1846 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) 1847 if (vr != sc && 1848 vr->sc_vhid == carpr.carpr_vhid) 1849 return EEXIST; 1850 } 1851 sc->sc_vhid = carpr.carpr_vhid; 1852 sc->sc_ac.ac_enaddr[0] = 0; 1853 sc->sc_ac.ac_enaddr[1] = 0; 1854 sc->sc_ac.ac_enaddr[2] = 0x5e; 1855 sc->sc_ac.ac_enaddr[3] = 0; 1856 sc->sc_ac.ac_enaddr[4] = 1; 1857 sc->sc_ac.ac_enaddr[5] = sc->sc_vhid; 1858 error--; 1859 } 1860 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1861 if (carpr.carpr_advskew >= 255) { 1862 error = EINVAL; 1863 break; 1864 } 1865 if (carpr.carpr_advbase > 255) { 1866 error = EINVAL; 1867 break; 1868 } 1869 sc->sc_advbase = carpr.carpr_advbase; 1870 sc->sc_advskew = carpr.carpr_advskew; 1871 error--; 1872 } 1873 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1874 if (error > 0) 1875 error = EINVAL; 1876 else { 1877 error = 0; 1878 carp_setrun(sc, 0); 1879 } 1880 break; 1881 1882 case SIOCGVH: 1883 /* XXX: lockless read */ 1884 bzero(&carpr, sizeof(carpr)); 1885 carpr.carpr_state = sc->sc_state; 1886 carpr.carpr_vhid = sc->sc_vhid; 1887 carpr.carpr_advbase = sc->sc_advbase; 1888 carpr.carpr_advskew = sc->sc_advskew; 1889 if (suser(curthread) == 0) 1890 bcopy(sc->sc_key, carpr.carpr_key, 1891 sizeof(carpr.carpr_key)); 1892 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1893 break; 1894 1895 default: 1896 error = EINVAL; 1897 } 1898 1899 if (locked) 1900 CARP_SCUNLOCK(sc); 1901 1902 carp_hmac_prepare(sc); 1903 1904 return (error); 1905 } 1906 1907 /* 1908 * XXX: this is looutput. We should eventually use it from there. 1909 */ 1910 static int 1911 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1912 struct rtentry *rt) 1913 { 1914 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1915 1916 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1917 m_freem(m); 1918 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1919 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1920 } 1921 1922 ifp->if_opackets++; 1923 ifp->if_obytes += m->m_pkthdr.len; 1924 #if 1 /* XXX */ 1925 switch (dst->sa_family) { 1926 case AF_INET: 1927 case AF_INET6: 1928 case AF_IPX: 1929 case AF_APPLETALK: 1930 break; 1931 default: 1932 printf("carp_looutput: af=%d unexpected\n", dst->sa_family); 1933 m_freem(m); 1934 return (EAFNOSUPPORT); 1935 } 1936 #endif 1937 return(if_simloop(ifp, m, dst->sa_family, 0)); 1938 } 1939 1940 /* 1941 * Start output on carp interface. This function should never be called. 1942 */ 1943 static void 1944 carp_start(struct ifnet *ifp) 1945 { 1946 #ifdef DEBUG 1947 printf("%s: start called\n", ifp->if_xname); 1948 #endif 1949 } 1950 1951 int 1952 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1953 struct rtentry *rt) 1954 { 1955 struct m_tag *mtag; 1956 struct carp_softc *sc; 1957 struct ifnet *carp_ifp; 1958 1959 if (!sa) 1960 return (0); 1961 1962 switch (sa->sa_family) { 1963 #ifdef INET 1964 case AF_INET: 1965 break; 1966 #endif /* INET */ 1967 #ifdef INET6 1968 case AF_INET6: 1969 break; 1970 #endif /* INET6 */ 1971 default: 1972 return (0); 1973 } 1974 1975 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1976 if (mtag == NULL) 1977 return (0); 1978 1979 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 1980 sc = carp_ifp->if_softc; 1981 1982 /* Set the source MAC address to Virtual Router MAC Address */ 1983 switch (ifp->if_type) { 1984 case IFT_ETHER: 1985 case IFT_L2VLAN: { 1986 struct ether_header *eh; 1987 1988 eh = mtod(m, struct ether_header *); 1989 eh->ether_shost[0] = 0; 1990 eh->ether_shost[1] = 0; 1991 eh->ether_shost[2] = 0x5e; 1992 eh->ether_shost[3] = 0; 1993 eh->ether_shost[4] = 1; 1994 eh->ether_shost[5] = sc->sc_vhid; 1995 } 1996 break; 1997 case IFT_FDDI: { 1998 struct fddi_header *fh; 1999 2000 fh = mtod(m, struct fddi_header *); 2001 fh->fddi_shost[0] = 0; 2002 fh->fddi_shost[1] = 0; 2003 fh->fddi_shost[2] = 0x5e; 2004 fh->fddi_shost[3] = 0; 2005 fh->fddi_shost[4] = 1; 2006 fh->fddi_shost[5] = sc->sc_vhid; 2007 } 2008 break; 2009 case IFT_ISO88025: { 2010 struct iso88025_header *th; 2011 th = mtod(m, struct iso88025_header *); 2012 th->iso88025_shost[0] = 3; 2013 th->iso88025_shost[1] = 0; 2014 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 2015 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 2016 th->iso88025_shost[4] = 0; 2017 th->iso88025_shost[5] = 0; 2018 } 2019 break; 2020 default: 2021 printf("%s: carp is not supported for this interface type\n", 2022 ifp->if_xname); 2023 return (EOPNOTSUPP); 2024 } 2025 2026 return (0); 2027 } 2028 2029 static void 2030 carp_set_state(struct carp_softc *sc, int state) 2031 { 2032 2033 if (sc->sc_carpdev) 2034 CARP_SCLOCK_ASSERT(sc); 2035 2036 if (sc->sc_state == state) 2037 return; 2038 2039 sc->sc_state = state; 2040 switch (state) { 2041 case BACKUP: 2042 sc->sc_ac.ac_if.if_link_state = LINK_STATE_DOWN; 2043 break; 2044 case MASTER: 2045 sc->sc_ac.ac_if.if_link_state = LINK_STATE_UP; 2046 break; 2047 default: 2048 sc->sc_ac.ac_if.if_link_state = LINK_STATE_UNKNOWN; 2049 break; 2050 } 2051 rt_ifmsg(&sc->sc_ac.ac_if); 2052 } 2053 2054 void 2055 carp_carpdev_state(void *v) 2056 { 2057 struct carp_if *cif = v; 2058 2059 /* 2060 * We came here from interrupt handler of network 2061 * card. To avoid multiple LORs, we will queue function 2062 * for later. 2063 */ 2064 2065 callout_reset(&cif->cif_tmo, 1, carp_carpdev_state1, v); 2066 } 2067 2068 void 2069 carp_carpdev_state1(void *v) 2070 { 2071 struct carp_if *cif = v; 2072 2073 CARP_LOCK(cif); 2074 carp_carpdev_state_locked(cif); 2075 CARP_UNLOCK(cif); 2076 } 2077 2078 static void 2079 carp_carpdev_state_locked(struct carp_if *cif) 2080 { 2081 struct carp_softc *sc; 2082 2083 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2084 carp_sc_state_locked(sc); 2085 } 2086 2087 static void 2088 carp_sc_state_locked(struct carp_softc *sc) 2089 { 2090 CARP_SCLOCK_ASSERT(sc); 2091 2092 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2093 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2094 sc->sc_flags_backup = sc->sc_if.if_flags; 2095 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 2096 callout_stop(&sc->sc_ad_tmo); 2097 callout_stop(&sc->sc_md_tmo); 2098 callout_stop(&sc->sc_md6_tmo); 2099 carp_set_state(sc, INIT); 2100 carp_setrun(sc, 0); 2101 if (!sc->sc_suppress) { 2102 carp_suppress_preempt++; 2103 if (carp_suppress_preempt == 1) { 2104 CARP_SCUNLOCK(sc); 2105 carp_send_ad_all(); 2106 CARP_SCLOCK(sc); 2107 } 2108 } 2109 sc->sc_suppress = 1; 2110 } else { 2111 sc->sc_if.if_flags |= sc->sc_flags_backup; 2112 carp_set_state(sc, INIT); 2113 carp_setrun(sc, 0); 2114 if (sc->sc_suppress) 2115 carp_suppress_preempt--; 2116 sc->sc_suppress = 0; 2117 } 2118 2119 return; 2120 } 2121 2122 static int 2123 carp_modevent(module_t mod, int type, void *data) 2124 { 2125 int error = 0; 2126 2127 switch (type) { 2128 case MOD_LOAD: 2129 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2130 LIST_INIT(&carpif_list); 2131 if_clone_attach(&carp_cloner); 2132 break; 2133 2134 case MOD_UNLOAD: 2135 if_clone_detach(&carp_cloner); 2136 while (!LIST_EMPTY(&carpif_list)) 2137 carp_clone_destroy(&LIST_FIRST(&carpif_list)->sc_if); 2138 mtx_destroy(&carp_mtx); 2139 break; 2140 2141 default: 2142 error = EINVAL; 2143 break; 2144 } 2145 2146 return error; 2147 } 2148 2149 static moduledata_t carp_mod = { 2150 "carp", 2151 carp_modevent, 2152 0 2153 }; 2154 2155 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2156