1 /*- 2 * Copyright (c) 2002 Michael Shalayeff. 3 * Copyright (c) 2003 Ryan McBride. 4 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/bus.h> 39 #include <sys/jail.h> 40 #include <sys/kernel.h> 41 #include <sys/limits.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/module.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/fddi.h> 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/if_llatbl.h> 61 #include <net/if_types.h> 62 #include <net/iso88025.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/ip6protosw.h> 82 #include <netinet6/in6_var.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet6/scope6_var.h> 85 #include <netinet6/nd6.h> 86 #endif 87 88 #include <crypto/sha1.h> 89 90 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 91 92 struct carp_softc { 93 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 94 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 95 struct sockaddr_dl sc_addr; /* Our link level address. */ 96 struct callout sc_ad_tmo; /* Advertising timeout. */ 97 #ifdef INET 98 struct callout sc_md_tmo; /* Master down timeout. */ 99 #endif 100 #ifdef INET6 101 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 102 #endif 103 struct mtx sc_mtx; 104 105 int sc_vhid; 106 int sc_advskew; 107 int sc_advbase; 108 109 int sc_naddrs; 110 int sc_naddrs6; 111 int sc_ifasiz; 112 enum { INIT = 0, BACKUP, MASTER } sc_state; 113 int sc_suppress; 114 int sc_sendad_errors; 115 #define CARP_SENDAD_MAX_ERRORS 3 116 int sc_sendad_success; 117 #define CARP_SENDAD_MIN_SUCCESS 3 118 119 int sc_init_counter; 120 uint64_t sc_counter; 121 122 /* authentication */ 123 #define CARP_HMAC_PAD 64 124 unsigned char sc_key[CARP_KEY_LEN]; 125 unsigned char sc_pad[CARP_HMAC_PAD]; 126 SHA1_CTX sc_sha1; 127 128 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 129 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 130 }; 131 132 struct carp_if { 133 #ifdef INET 134 int cif_naddrs; 135 #endif 136 #ifdef INET6 137 int cif_naddrs6; 138 #endif 139 TAILQ_HEAD(, carp_softc) cif_vrs; 140 #ifdef INET 141 struct ip_moptions cif_imo; 142 #endif 143 #ifdef INET6 144 struct ip6_moptions cif_im6o; 145 #endif 146 struct ifnet *cif_ifp; 147 struct mtx cif_mtx; 148 uint32_t cif_flags; 149 #define CIF_PROMISC 0x00000001 150 }; 151 152 #define CARP_INET 0 153 #define CARP_INET6 1 154 static int proto_reg[] = {-1, -1}; 155 156 /* 157 * Brief design of carp(4). 158 * 159 * Any carp-capable ifnet may have a list of carp softcs hanging off 160 * its ifp->if_carp pointer. Each softc represents one unique virtual 161 * host id, or vhid. The softc has a back pointer to the ifnet. All 162 * softcs are joined in a global list, which has quite limited use. 163 * 164 * Any interface address that takes part in CARP negotiation has a 165 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 166 * AF_INET or AF_INET6 address. 167 * 168 * Although, one can get the softc's backpointer to ifnet and traverse 169 * through its ifp->if_addrhead queue to find all interface addresses 170 * involved in CARP, we keep a growable array of ifaddr pointers. This 171 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 172 * do calls into the network stack, thus avoiding LORs. 173 * 174 * Locking: 175 * 176 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 177 * callout-driven events and ioctl()s. 178 * 179 * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to 180 * traverse the global list we use the mutex carp_mtx. 181 * 182 * Known issues with locking: 183 * 184 * - There is no protection for races between two ioctl() requests, 185 * neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all 186 * interface ioctl()s should be serialized right in net/if.c. 187 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 188 * counting is done on the softc. 189 * - On module unload we may race (?) with packet processing thread 190 * dereferencing our function pointers. 191 */ 192 193 /* Accept incoming CARP packets. */ 194 static VNET_DEFINE(int, carp_allow) = 1; 195 #define V_carp_allow VNET(carp_allow) 196 197 /* Preempt slower nodes. */ 198 static VNET_DEFINE(int, carp_preempt) = 0; 199 #define V_carp_preempt VNET(carp_preempt) 200 201 /* Log level. */ 202 static VNET_DEFINE(int, carp_log) = 1; 203 #define V_carp_log VNET(carp_log) 204 205 /* Global advskew demotion. */ 206 static VNET_DEFINE(int, carp_demotion) = 0; 207 #define V_carp_demotion VNET(carp_demotion) 208 209 /* Send error demotion factor. */ 210 static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW; 211 #define V_carp_senderr_adj VNET(carp_senderr_adj) 212 213 /* Iface down demotion factor. */ 214 static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW; 215 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 216 217 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 218 219 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 220 SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, 221 &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets"); 222 SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, 223 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 224 SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, 225 &VNET_NAME(carp_log), 0, "CARP log level"); 226 SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW, 227 0, 0, carp_demote_adj_sysctl, "I", 228 "Adjust demotion factor (skew of advskew)"); 229 SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, 230 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 231 SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, 232 &VNET_NAME(carp_ifdown_adj), 0, 233 "Interface down demotion factor adjustment"); 234 235 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 236 VNET_PCPUSTAT_SYSINIT(carpstats); 237 VNET_PCPUSTAT_SYSUNINIT(carpstats); 238 239 #define CARPSTATS_ADD(name, val) \ 240 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 241 sizeof(uint64_t)], (val)) 242 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 243 244 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 245 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 246 247 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 248 NULL, MTX_DEF) 249 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 250 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 251 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 252 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 253 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 254 NULL, MTX_DEF) 255 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 256 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 257 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 258 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 259 #define CIF_FREE(cif) do { \ 260 CIF_LOCK_ASSERT(cif); \ 261 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 262 carp_free_if(cif); \ 263 else \ 264 CIF_UNLOCK(cif); \ 265 } while (0) 266 267 #define CARP_LOG(...) do { \ 268 if (V_carp_log > 0) \ 269 log(LOG_INFO, "carp: " __VA_ARGS__); \ 270 } while (0) 271 272 #define CARP_DEBUG(...) do { \ 273 if (V_carp_log > 1) \ 274 log(LOG_DEBUG, __VA_ARGS__); \ 275 } while (0) 276 277 #define IFNET_FOREACH_IFA(ifp, ifa) \ 278 IF_ADDR_LOCK_ASSERT(ifp); \ 279 TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 280 if ((ifa)->ifa_carp != NULL) 281 282 #define CARP_FOREACH_IFA(sc, ifa) \ 283 CARP_LOCK_ASSERT(sc); \ 284 for (int _i = 0; \ 285 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 286 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 287 ++_i) 288 289 #define IFNET_FOREACH_CARP(ifp, sc) \ 290 CIF_LOCK_ASSERT(ifp->if_carp); \ 291 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 292 293 #define DEMOTE_ADVSKEW(sc) \ 294 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 295 CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) 296 297 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 298 static struct carp_softc 299 *carp_alloc(struct ifnet *); 300 static void carp_detach_locked(struct ifaddr *); 301 static void carp_destroy(struct carp_softc *); 302 static struct carp_if 303 *carp_alloc_if(struct ifnet *); 304 static void carp_free_if(struct carp_if *); 305 static void carp_set_state(struct carp_softc *, int); 306 static void carp_sc_state(struct carp_softc *); 307 static void carp_setrun(struct carp_softc *, sa_family_t); 308 static void carp_master_down(void *); 309 static void carp_master_down_locked(struct carp_softc *); 310 static void carp_send_ad(void *); 311 static void carp_send_ad_locked(struct carp_softc *); 312 static void carp_addroute(struct carp_softc *); 313 static void carp_ifa_addroute(struct ifaddr *); 314 static void carp_delroute(struct carp_softc *); 315 static void carp_ifa_delroute(struct ifaddr *); 316 static void carp_send_ad_all(void *, int); 317 static void carp_demote_adj(int, char *); 318 319 static LIST_HEAD(, carp_softc) carp_list; 320 static struct mtx carp_mtx; 321 static struct task carp_sendall_task = 322 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 323 324 static void 325 carp_hmac_prepare(struct carp_softc *sc) 326 { 327 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 328 uint8_t vhid = sc->sc_vhid & 0xff; 329 struct ifaddr *ifa; 330 int i, found; 331 #ifdef INET 332 struct in_addr last, cur, in; 333 #endif 334 #ifdef INET6 335 struct in6_addr last6, cur6, in6; 336 #endif 337 338 CARP_LOCK_ASSERT(sc); 339 340 /* Compute ipad from key. */ 341 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 342 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 343 for (i = 0; i < sizeof(sc->sc_pad); i++) 344 sc->sc_pad[i] ^= 0x36; 345 346 /* Precompute first part of inner hash. */ 347 SHA1Init(&sc->sc_sha1); 348 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 349 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 350 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 351 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 352 #ifdef INET 353 cur.s_addr = 0; 354 do { 355 found = 0; 356 last = cur; 357 cur.s_addr = 0xffffffff; 358 CARP_FOREACH_IFA(sc, ifa) { 359 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 360 if (ifa->ifa_addr->sa_family == AF_INET && 361 ntohl(in.s_addr) > ntohl(last.s_addr) && 362 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 363 cur.s_addr = in.s_addr; 364 found++; 365 } 366 } 367 if (found) 368 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 369 } while (found); 370 #endif /* INET */ 371 #ifdef INET6 372 memset(&cur6, 0, sizeof(cur6)); 373 do { 374 found = 0; 375 last6 = cur6; 376 memset(&cur6, 0xff, sizeof(cur6)); 377 CARP_FOREACH_IFA(sc, ifa) { 378 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 379 if (IN6_IS_SCOPE_EMBED(&in6)) 380 in6.s6_addr16[1] = 0; 381 if (ifa->ifa_addr->sa_family == AF_INET6 && 382 memcmp(&in6, &last6, sizeof(in6)) > 0 && 383 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 384 cur6 = in6; 385 found++; 386 } 387 } 388 if (found) 389 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 390 } while (found); 391 #endif /* INET6 */ 392 393 /* convert ipad to opad */ 394 for (i = 0; i < sizeof(sc->sc_pad); i++) 395 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 396 } 397 398 static void 399 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 400 unsigned char md[20]) 401 { 402 SHA1_CTX sha1ctx; 403 404 CARP_LOCK_ASSERT(sc); 405 406 /* fetch first half of inner hash */ 407 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 408 409 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 410 SHA1Final(md, &sha1ctx); 411 412 /* outer hash */ 413 SHA1Init(&sha1ctx); 414 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 415 SHA1Update(&sha1ctx, md, 20); 416 SHA1Final(md, &sha1ctx); 417 } 418 419 static int 420 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 421 unsigned char md[20]) 422 { 423 unsigned char md2[20]; 424 425 CARP_LOCK_ASSERT(sc); 426 427 carp_hmac_generate(sc, counter, md2); 428 429 return (bcmp(md, md2, sizeof(md2))); 430 } 431 432 /* 433 * process input packet. 434 * we have rearranged checks order compared to the rfc, 435 * but it seems more efficient this way or not possible otherwise. 436 */ 437 #ifdef INET 438 void 439 carp_input(struct mbuf *m, int hlen) 440 { 441 struct ip *ip = mtod(m, struct ip *); 442 struct carp_header *ch; 443 int iplen, len; 444 445 CARPSTATS_INC(carps_ipackets); 446 447 if (!V_carp_allow) { 448 m_freem(m); 449 return; 450 } 451 452 /* verify that the IP TTL is 255. */ 453 if (ip->ip_ttl != CARP_DFLTTL) { 454 CARPSTATS_INC(carps_badttl); 455 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 456 ip->ip_ttl, 457 m->m_pkthdr.rcvif->if_xname); 458 m_freem(m); 459 return; 460 } 461 462 iplen = ip->ip_hl << 2; 463 464 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 465 CARPSTATS_INC(carps_badlen); 466 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 467 "on %s\n", __func__, m->m_len - sizeof(struct ip), 468 m->m_pkthdr.rcvif->if_xname); 469 m_freem(m); 470 return; 471 } 472 473 if (iplen + sizeof(*ch) < m->m_len) { 474 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 475 CARPSTATS_INC(carps_hdrops); 476 CARP_DEBUG("%s: pullup failed\n", __func__); 477 return; 478 } 479 ip = mtod(m, struct ip *); 480 } 481 ch = (struct carp_header *)((char *)ip + iplen); 482 483 /* 484 * verify that the received packet length is 485 * equal to the CARP header 486 */ 487 len = iplen + sizeof(*ch); 488 if (len > m->m_pkthdr.len) { 489 CARPSTATS_INC(carps_badlen); 490 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 491 m->m_pkthdr.len, 492 m->m_pkthdr.rcvif->if_xname); 493 m_freem(m); 494 return; 495 } 496 497 if ((m = m_pullup(m, len)) == NULL) { 498 CARPSTATS_INC(carps_hdrops); 499 return; 500 } 501 ip = mtod(m, struct ip *); 502 ch = (struct carp_header *)((char *)ip + iplen); 503 504 /* verify the CARP checksum */ 505 m->m_data += iplen; 506 if (in_cksum(m, len - iplen)) { 507 CARPSTATS_INC(carps_badsum); 508 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 509 m->m_pkthdr.rcvif->if_xname); 510 m_freem(m); 511 return; 512 } 513 m->m_data -= iplen; 514 515 carp_input_c(m, ch, AF_INET); 516 } 517 #endif 518 519 #ifdef INET6 520 int 521 carp6_input(struct mbuf **mp, int *offp, int proto) 522 { 523 struct mbuf *m = *mp; 524 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 525 struct carp_header *ch; 526 u_int len; 527 528 CARPSTATS_INC(carps_ipackets6); 529 530 if (!V_carp_allow) { 531 m_freem(m); 532 return (IPPROTO_DONE); 533 } 534 535 /* check if received on a valid carp interface */ 536 if (m->m_pkthdr.rcvif->if_carp == NULL) { 537 CARPSTATS_INC(carps_badif); 538 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 539 __func__, m->m_pkthdr.rcvif->if_xname); 540 m_freem(m); 541 return (IPPROTO_DONE); 542 } 543 544 /* verify that the IP TTL is 255 */ 545 if (ip6->ip6_hlim != CARP_DFLTTL) { 546 CARPSTATS_INC(carps_badttl); 547 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 548 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 549 m_freem(m); 550 return (IPPROTO_DONE); 551 } 552 553 /* verify that we have a complete carp packet */ 554 len = m->m_len; 555 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 556 if (ch == NULL) { 557 CARPSTATS_INC(carps_badlen); 558 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 559 return (IPPROTO_DONE); 560 } 561 562 563 /* verify the CARP checksum */ 564 m->m_data += *offp; 565 if (in_cksum(m, sizeof(*ch))) { 566 CARPSTATS_INC(carps_badsum); 567 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 568 m->m_pkthdr.rcvif->if_xname); 569 m_freem(m); 570 return (IPPROTO_DONE); 571 } 572 m->m_data -= *offp; 573 574 carp_input_c(m, ch, AF_INET6); 575 return (IPPROTO_DONE); 576 } 577 #endif /* INET6 */ 578 579 static void 580 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 581 { 582 struct ifnet *ifp = m->m_pkthdr.rcvif; 583 struct ifaddr *ifa; 584 struct carp_softc *sc; 585 uint64_t tmp_counter; 586 struct timeval sc_tv, ch_tv; 587 588 /* verify that the VHID is valid on the receiving interface */ 589 IF_ADDR_RLOCK(ifp); 590 IFNET_FOREACH_IFA(ifp, ifa) 591 if (ifa->ifa_addr->sa_family == af && 592 ifa->ifa_carp->sc_vhid == ch->carp_vhid) { 593 ifa_ref(ifa); 594 break; 595 } 596 IF_ADDR_RUNLOCK(ifp); 597 598 if (ifa == NULL) { 599 CARPSTATS_INC(carps_badvhid); 600 m_freem(m); 601 return; 602 } 603 604 /* verify the CARP version. */ 605 if (ch->carp_version != CARP_VERSION) { 606 CARPSTATS_INC(carps_badver); 607 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 608 ch->carp_version); 609 ifa_free(ifa); 610 m_freem(m); 611 return; 612 } 613 614 sc = ifa->ifa_carp; 615 CARP_LOCK(sc); 616 ifa_free(ifa); 617 618 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 619 CARPSTATS_INC(carps_badauth); 620 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 621 sc->sc_vhid, ifp->if_xname); 622 goto out; 623 } 624 625 tmp_counter = ntohl(ch->carp_counter[0]); 626 tmp_counter = tmp_counter<<32; 627 tmp_counter += ntohl(ch->carp_counter[1]); 628 629 /* XXX Replay protection goes here */ 630 631 sc->sc_init_counter = 0; 632 sc->sc_counter = tmp_counter; 633 634 sc_tv.tv_sec = sc->sc_advbase; 635 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 636 ch_tv.tv_sec = ch->carp_advbase; 637 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 638 639 switch (sc->sc_state) { 640 case INIT: 641 break; 642 case MASTER: 643 /* 644 * If we receive an advertisement from a master who's going to 645 * be more frequent than us, go into BACKUP state. 646 */ 647 if (timevalcmp(&sc_tv, &ch_tv, >) || 648 timevalcmp(&sc_tv, &ch_tv, ==)) { 649 callout_stop(&sc->sc_ad_tmo); 650 CARP_LOG("VHID %u@%s: MASTER -> BACKUP " 651 "(more frequent advertisement received)\n", 652 sc->sc_vhid, 653 sc->sc_carpdev->if_xname); 654 carp_set_state(sc, BACKUP); 655 carp_setrun(sc, 0); 656 carp_delroute(sc); 657 } 658 break; 659 case BACKUP: 660 /* 661 * If we're pre-empting masters who advertise slower than us, 662 * and this one claims to be slower, treat him as down. 663 */ 664 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 665 CARP_LOG("VHID %u@%s: BACKUP -> MASTER " 666 "(preempting a slower master)\n", 667 sc->sc_vhid, 668 sc->sc_carpdev->if_xname); 669 carp_master_down_locked(sc); 670 break; 671 } 672 673 /* 674 * If the master is going to advertise at such a low frequency 675 * that he's guaranteed to time out, we'd might as well just 676 * treat him as timed out now. 677 */ 678 sc_tv.tv_sec = sc->sc_advbase * 3; 679 if (timevalcmp(&sc_tv, &ch_tv, <)) { 680 CARP_LOG("VHID %u@%s: BACKUP -> MASTER " 681 "(master timed out)\n", 682 sc->sc_vhid, 683 sc->sc_carpdev->if_xname); 684 carp_master_down_locked(sc); 685 break; 686 } 687 688 /* 689 * Otherwise, we reset the counter and wait for the next 690 * advertisement. 691 */ 692 carp_setrun(sc, af); 693 break; 694 } 695 696 out: 697 CARP_UNLOCK(sc); 698 m_freem(m); 699 } 700 701 static int 702 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 703 { 704 struct m_tag *mtag; 705 706 if (sc->sc_init_counter) { 707 /* this could also be seconds since unix epoch */ 708 sc->sc_counter = arc4random(); 709 sc->sc_counter = sc->sc_counter << 32; 710 sc->sc_counter += arc4random(); 711 } else 712 sc->sc_counter++; 713 714 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 715 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 716 717 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 718 719 /* Tag packet for carp_output */ 720 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 721 M_NOWAIT)) == NULL) { 722 m_freem(m); 723 CARPSTATS_INC(carps_onomem); 724 return (ENOMEM); 725 } 726 bcopy(&sc, mtag + 1, sizeof(sc)); 727 m_tag_prepend(m, mtag); 728 729 return (0); 730 } 731 732 /* 733 * To avoid LORs and possible recursions this function shouldn't 734 * be called directly, but scheduled via taskqueue. 735 */ 736 static void 737 carp_send_ad_all(void *ctx __unused, int pending __unused) 738 { 739 struct carp_softc *sc; 740 741 mtx_lock(&carp_mtx); 742 LIST_FOREACH(sc, &carp_list, sc_next) 743 if (sc->sc_state == MASTER) { 744 CARP_LOCK(sc); 745 CURVNET_SET(sc->sc_carpdev->if_vnet); 746 carp_send_ad_locked(sc); 747 CURVNET_RESTORE(); 748 CARP_UNLOCK(sc); 749 } 750 mtx_unlock(&carp_mtx); 751 } 752 753 /* Send a periodic advertisement, executed in callout context. */ 754 static void 755 carp_send_ad(void *v) 756 { 757 struct carp_softc *sc = v; 758 759 CARP_LOCK_ASSERT(sc); 760 CURVNET_SET(sc->sc_carpdev->if_vnet); 761 carp_send_ad_locked(sc); 762 CURVNET_RESTORE(); 763 CARP_UNLOCK(sc); 764 } 765 766 static void 767 carp_send_ad_error(struct carp_softc *sc, int error) 768 { 769 770 if (error) { 771 if (sc->sc_sendad_errors < INT_MAX) 772 sc->sc_sendad_errors++; 773 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 774 static const char fmt[] = "send error %d on %s"; 775 char msg[sizeof(fmt) + IFNAMSIZ]; 776 777 sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); 778 carp_demote_adj(V_carp_senderr_adj, msg); 779 } 780 sc->sc_sendad_success = 0; 781 } else { 782 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && 783 ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 784 static const char fmt[] = "send ok on %s"; 785 char msg[sizeof(fmt) + IFNAMSIZ]; 786 787 sprintf(msg, fmt, sc->sc_carpdev->if_xname); 788 carp_demote_adj(-V_carp_senderr_adj, msg); 789 sc->sc_sendad_errors = 0; 790 } else 791 sc->sc_sendad_errors = 0; 792 } 793 } 794 795 static void 796 carp_send_ad_locked(struct carp_softc *sc) 797 { 798 struct carp_header ch; 799 struct timeval tv; 800 struct sockaddr sa; 801 struct ifaddr *ifa; 802 struct carp_header *ch_ptr; 803 struct mbuf *m; 804 int len, advskew; 805 806 CARP_LOCK_ASSERT(sc); 807 808 advskew = DEMOTE_ADVSKEW(sc); 809 tv.tv_sec = sc->sc_advbase; 810 tv.tv_usec = advskew * 1000000 / 256; 811 812 ch.carp_version = CARP_VERSION; 813 ch.carp_type = CARP_ADVERTISEMENT; 814 ch.carp_vhid = sc->sc_vhid; 815 ch.carp_advbase = sc->sc_advbase; 816 ch.carp_advskew = advskew; 817 ch.carp_authlen = 7; /* XXX DEFINE */ 818 ch.carp_pad1 = 0; /* must be zero */ 819 ch.carp_cksum = 0; 820 821 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 822 823 #ifdef INET 824 if (sc->sc_naddrs) { 825 struct ip *ip; 826 827 m = m_gethdr(M_NOWAIT, MT_DATA); 828 if (m == NULL) { 829 CARPSTATS_INC(carps_onomem); 830 goto resched; 831 } 832 len = sizeof(*ip) + sizeof(ch); 833 m->m_pkthdr.len = len; 834 m->m_pkthdr.rcvif = NULL; 835 m->m_len = len; 836 MH_ALIGN(m, m->m_len); 837 m->m_flags |= M_MCAST; 838 ip = mtod(m, struct ip *); 839 ip->ip_v = IPVERSION; 840 ip->ip_hl = sizeof(*ip) >> 2; 841 ip->ip_tos = IPTOS_LOWDELAY; 842 ip->ip_len = htons(len); 843 ip->ip_id = ip_newid(); 844 ip->ip_off = htons(IP_DF); 845 ip->ip_ttl = CARP_DFLTTL; 846 ip->ip_p = IPPROTO_CARP; 847 ip->ip_sum = 0; 848 849 bzero(&sa, sizeof(sa)); 850 sa.sa_family = AF_INET; 851 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 852 if (ifa != NULL) { 853 ip->ip_src.s_addr = 854 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 855 ifa_free(ifa); 856 } else 857 ip->ip_src.s_addr = 0; 858 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 859 860 ch_ptr = (struct carp_header *)(&ip[1]); 861 bcopy(&ch, ch_ptr, sizeof(ch)); 862 if (carp_prepare_ad(m, sc, ch_ptr)) 863 goto resched; 864 865 m->m_data += sizeof(*ip); 866 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 867 m->m_data -= sizeof(*ip); 868 869 CARPSTATS_INC(carps_opackets); 870 871 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 872 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 873 } 874 #endif /* INET */ 875 #ifdef INET6 876 if (sc->sc_naddrs6) { 877 struct ip6_hdr *ip6; 878 879 m = m_gethdr(M_NOWAIT, MT_DATA); 880 if (m == NULL) { 881 CARPSTATS_INC(carps_onomem); 882 goto resched; 883 } 884 len = sizeof(*ip6) + sizeof(ch); 885 m->m_pkthdr.len = len; 886 m->m_pkthdr.rcvif = NULL; 887 m->m_len = len; 888 MH_ALIGN(m, m->m_len); 889 m->m_flags |= M_MCAST; 890 ip6 = mtod(m, struct ip6_hdr *); 891 bzero(ip6, sizeof(*ip6)); 892 ip6->ip6_vfc |= IPV6_VERSION; 893 ip6->ip6_hlim = CARP_DFLTTL; 894 ip6->ip6_nxt = IPPROTO_CARP; 895 bzero(&sa, sizeof(sa)); 896 897 /* set the source address */ 898 sa.sa_family = AF_INET6; 899 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 900 if (ifa != NULL) { 901 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 902 sizeof(struct in6_addr)); 903 ifa_free(ifa); 904 } else 905 /* This should never happen with IPv6. */ 906 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 907 908 /* Set the multicast destination. */ 909 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 910 ip6->ip6_dst.s6_addr8[15] = 0x12; 911 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 912 m_freem(m); 913 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 914 goto resched; 915 } 916 917 ch_ptr = (struct carp_header *)(&ip6[1]); 918 bcopy(&ch, ch_ptr, sizeof(ch)); 919 if (carp_prepare_ad(m, sc, ch_ptr)) 920 goto resched; 921 922 m->m_data += sizeof(*ip6); 923 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 924 m->m_data -= sizeof(*ip6); 925 926 CARPSTATS_INC(carps_opackets6); 927 928 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 929 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 930 } 931 #endif /* INET6 */ 932 933 resched: 934 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 935 } 936 937 static void 938 carp_addroute(struct carp_softc *sc) 939 { 940 struct ifaddr *ifa; 941 942 CARP_FOREACH_IFA(sc, ifa) 943 carp_ifa_addroute(ifa); 944 } 945 946 static void 947 carp_ifa_addroute(struct ifaddr *ifa) 948 { 949 950 switch (ifa->ifa_addr->sa_family) { 951 #ifdef INET 952 case AF_INET: 953 in_addprefix(ifatoia(ifa), RTF_UP); 954 ifa_add_loopback_route(ifa, 955 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 956 break; 957 #endif 958 #ifdef INET6 959 case AF_INET6: 960 ifa_add_loopback_route(ifa, 961 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 962 nd6_add_ifa_lle(ifatoia6(ifa)); 963 break; 964 #endif 965 } 966 } 967 968 static void 969 carp_delroute(struct carp_softc *sc) 970 { 971 struct ifaddr *ifa; 972 973 CARP_FOREACH_IFA(sc, ifa) 974 carp_ifa_delroute(ifa); 975 } 976 977 static void 978 carp_ifa_delroute(struct ifaddr *ifa) 979 { 980 981 switch (ifa->ifa_addr->sa_family) { 982 #ifdef INET 983 case AF_INET: 984 ifa_del_loopback_route(ifa, 985 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 986 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 987 break; 988 #endif 989 #ifdef INET6 990 case AF_INET6: 991 ifa_del_loopback_route(ifa, 992 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 993 nd6_rem_ifa_lle(ifatoia6(ifa)); 994 break; 995 #endif 996 } 997 } 998 999 int 1000 carp_master(struct ifaddr *ifa) 1001 { 1002 struct carp_softc *sc = ifa->ifa_carp; 1003 1004 return (sc->sc_state == MASTER); 1005 } 1006 1007 #ifdef INET 1008 /* 1009 * Broadcast a gratuitous ARP request containing 1010 * the virtual router MAC address for each IP address 1011 * associated with the virtual router. 1012 */ 1013 static void 1014 carp_send_arp(struct carp_softc *sc) 1015 { 1016 struct ifaddr *ifa; 1017 1018 CARP_FOREACH_IFA(sc, ifa) 1019 if (ifa->ifa_addr->sa_family == AF_INET) 1020 arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr)); 1021 } 1022 1023 int 1024 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1025 { 1026 struct carp_softc *sc = ifa->ifa_carp; 1027 1028 if (sc->sc_state == MASTER) { 1029 *enaddr = LLADDR(&sc->sc_addr); 1030 return (1); 1031 } 1032 1033 return (0); 1034 } 1035 #endif 1036 1037 #ifdef INET6 1038 static void 1039 carp_send_na(struct carp_softc *sc) 1040 { 1041 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1042 struct ifaddr *ifa; 1043 struct in6_addr *in6; 1044 1045 CARP_FOREACH_IFA(sc, ifa) { 1046 if (ifa->ifa_addr->sa_family != AF_INET6) 1047 continue; 1048 1049 in6 = IFA_IN6(ifa); 1050 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1051 ND_NA_FLAG_OVERRIDE, 1, NULL); 1052 DELAY(1000); /* XXX */ 1053 } 1054 } 1055 1056 /* 1057 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1058 * matches and is not a carp address. Returns NULL otherwise. 1059 */ 1060 struct ifaddr * 1061 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1062 { 1063 struct ifaddr *ifa; 1064 1065 ifa = NULL; 1066 IF_ADDR_RLOCK(ifp); 1067 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1068 if (ifa->ifa_addr->sa_family != AF_INET6) 1069 continue; 1070 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1071 continue; 1072 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1073 ifa = NULL; 1074 else 1075 ifa_ref(ifa); 1076 break; 1077 } 1078 IF_ADDR_RUNLOCK(ifp); 1079 1080 return (ifa); 1081 } 1082 1083 caddr_t 1084 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1085 { 1086 struct ifaddr *ifa; 1087 1088 IF_ADDR_RLOCK(ifp); 1089 IFNET_FOREACH_IFA(ifp, ifa) 1090 if (ifa->ifa_addr->sa_family == AF_INET6 && 1091 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1092 struct carp_softc *sc = ifa->ifa_carp; 1093 struct m_tag *mtag; 1094 1095 IF_ADDR_RUNLOCK(ifp); 1096 1097 mtag = m_tag_get(PACKET_TAG_CARP, 1098 sizeof(struct carp_softc *), M_NOWAIT); 1099 if (mtag == NULL) 1100 /* Better a bit than nothing. */ 1101 return (LLADDR(&sc->sc_addr)); 1102 1103 bcopy(&sc, mtag + 1, sizeof(sc)); 1104 m_tag_prepend(m, mtag); 1105 1106 return (LLADDR(&sc->sc_addr)); 1107 } 1108 IF_ADDR_RUNLOCK(ifp); 1109 1110 return (NULL); 1111 } 1112 #endif /* INET6 */ 1113 1114 int 1115 carp_forus(struct ifnet *ifp, u_char *dhost) 1116 { 1117 struct carp_softc *sc; 1118 uint8_t *ena = dhost; 1119 1120 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1121 return (0); 1122 1123 CIF_LOCK(ifp->if_carp); 1124 IFNET_FOREACH_CARP(ifp, sc) { 1125 CARP_LOCK(sc); 1126 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1127 ETHER_ADDR_LEN)) { 1128 CARP_UNLOCK(sc); 1129 CIF_UNLOCK(ifp->if_carp); 1130 return (1); 1131 } 1132 CARP_UNLOCK(sc); 1133 } 1134 CIF_UNLOCK(ifp->if_carp); 1135 1136 return (0); 1137 } 1138 1139 /* Master down timeout event, executed in callout context. */ 1140 static void 1141 carp_master_down(void *v) 1142 { 1143 struct carp_softc *sc = v; 1144 1145 CARP_LOCK_ASSERT(sc); 1146 1147 CURVNET_SET(sc->sc_carpdev->if_vnet); 1148 if (sc->sc_state == BACKUP) { 1149 CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n", 1150 sc->sc_vhid, 1151 sc->sc_carpdev->if_xname); 1152 carp_master_down_locked(sc); 1153 } 1154 CURVNET_RESTORE(); 1155 1156 CARP_UNLOCK(sc); 1157 } 1158 1159 static void 1160 carp_master_down_locked(struct carp_softc *sc) 1161 { 1162 1163 CARP_LOCK_ASSERT(sc); 1164 1165 switch (sc->sc_state) { 1166 case BACKUP: 1167 carp_set_state(sc, MASTER); 1168 carp_send_ad_locked(sc); 1169 #ifdef INET 1170 carp_send_arp(sc); 1171 #endif 1172 #ifdef INET6 1173 carp_send_na(sc); 1174 #endif 1175 carp_setrun(sc, 0); 1176 carp_addroute(sc); 1177 break; 1178 case INIT: 1179 case MASTER: 1180 #ifdef INVARIANTS 1181 panic("carp: VHID %u@%s: master_down event in %s state\n", 1182 sc->sc_vhid, 1183 sc->sc_carpdev->if_xname, 1184 sc->sc_state ? "MASTER" : "INIT"); 1185 #endif 1186 break; 1187 } 1188 } 1189 1190 /* 1191 * When in backup state, af indicates whether to reset the master down timer 1192 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1193 */ 1194 static void 1195 carp_setrun(struct carp_softc *sc, sa_family_t af) 1196 { 1197 struct timeval tv; 1198 1199 CARP_LOCK_ASSERT(sc); 1200 1201 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1202 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1203 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)) 1204 return; 1205 1206 switch (sc->sc_state) { 1207 case INIT: 1208 CARP_LOG("VHID %u@%s: INIT -> BACKUP\n", 1209 sc->sc_vhid, 1210 sc->sc_carpdev->if_xname); 1211 carp_set_state(sc, BACKUP); 1212 carp_setrun(sc, 0); 1213 break; 1214 case BACKUP: 1215 callout_stop(&sc->sc_ad_tmo); 1216 tv.tv_sec = 3 * sc->sc_advbase; 1217 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1218 switch (af) { 1219 #ifdef INET 1220 case AF_INET: 1221 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1222 carp_master_down, sc); 1223 break; 1224 #endif 1225 #ifdef INET6 1226 case AF_INET6: 1227 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1228 carp_master_down, sc); 1229 break; 1230 #endif 1231 default: 1232 #ifdef INET 1233 if (sc->sc_naddrs) 1234 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1235 carp_master_down, sc); 1236 #endif 1237 #ifdef INET6 1238 if (sc->sc_naddrs6) 1239 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1240 carp_master_down, sc); 1241 #endif 1242 break; 1243 } 1244 break; 1245 case MASTER: 1246 tv.tv_sec = sc->sc_advbase; 1247 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1248 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1249 carp_send_ad, sc); 1250 break; 1251 } 1252 } 1253 1254 /* 1255 * Setup multicast structures. 1256 */ 1257 static int 1258 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1259 { 1260 struct ifnet *ifp = cif->cif_ifp; 1261 int error = 0; 1262 1263 CIF_LOCK_ASSERT(cif); 1264 1265 switch (sa) { 1266 #ifdef INET 1267 case AF_INET: 1268 { 1269 struct ip_moptions *imo = &cif->cif_imo; 1270 struct in_addr addr; 1271 1272 if (imo->imo_membership) 1273 return (0); 1274 1275 imo->imo_membership = (struct in_multi **)malloc( 1276 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 1277 M_NOWAIT); 1278 if (imo->imo_membership == NULL) 1279 return (ENOMEM); 1280 imo->imo_mfilters = NULL; 1281 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1282 imo->imo_multicast_vif = -1; 1283 1284 addr.s_addr = htonl(INADDR_CARP_GROUP); 1285 if ((error = in_joingroup(ifp, &addr, NULL, 1286 &imo->imo_membership[0])) != 0) { 1287 free(imo->imo_membership, M_CARP); 1288 break; 1289 } 1290 imo->imo_num_memberships++; 1291 imo->imo_multicast_ifp = ifp; 1292 imo->imo_multicast_ttl = CARP_DFLTTL; 1293 imo->imo_multicast_loop = 0; 1294 break; 1295 } 1296 #endif 1297 #ifdef INET6 1298 case AF_INET6: 1299 { 1300 struct ip6_moptions *im6o = &cif->cif_im6o; 1301 struct in6_addr in6; 1302 struct in6_multi *in6m; 1303 1304 if (im6o->im6o_membership) 1305 return (0); 1306 1307 im6o->im6o_membership = (struct in6_multi **)malloc( 1308 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 1309 M_ZERO | M_NOWAIT); 1310 if (im6o->im6o_membership == NULL) 1311 return (ENOMEM); 1312 im6o->im6o_mfilters = NULL; 1313 im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 1314 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1315 im6o->im6o_multicast_ifp = ifp; 1316 1317 /* Join IPv6 CARP multicast group. */ 1318 bzero(&in6, sizeof(in6)); 1319 in6.s6_addr16[0] = htons(0xff02); 1320 in6.s6_addr8[15] = 0x12; 1321 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1322 free(im6o->im6o_membership, M_CARP); 1323 break; 1324 } 1325 in6m = NULL; 1326 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { 1327 free(im6o->im6o_membership, M_CARP); 1328 break; 1329 } 1330 im6o->im6o_membership[0] = in6m; 1331 im6o->im6o_num_memberships++; 1332 1333 /* Join solicited multicast address. */ 1334 bzero(&in6, sizeof(in6)); 1335 in6.s6_addr16[0] = htons(0xff02); 1336 in6.s6_addr32[1] = 0; 1337 in6.s6_addr32[2] = htonl(1); 1338 in6.s6_addr32[3] = 0; 1339 in6.s6_addr8[12] = 0xff; 1340 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1341 in6_mc_leave(im6o->im6o_membership[0], NULL); 1342 free(im6o->im6o_membership, M_CARP); 1343 break; 1344 } 1345 in6m = NULL; 1346 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { 1347 in6_mc_leave(im6o->im6o_membership[0], NULL); 1348 free(im6o->im6o_membership, M_CARP); 1349 break; 1350 } 1351 im6o->im6o_membership[1] = in6m; 1352 im6o->im6o_num_memberships++; 1353 break; 1354 } 1355 #endif 1356 } 1357 1358 return (error); 1359 } 1360 1361 /* 1362 * Free multicast structures. 1363 */ 1364 static void 1365 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1366 { 1367 1368 CIF_LOCK_ASSERT(cif); 1369 switch (sa) { 1370 #ifdef INET 1371 case AF_INET: 1372 if (cif->cif_naddrs == 0) { 1373 struct ip_moptions *imo = &cif->cif_imo; 1374 1375 in_leavegroup(imo->imo_membership[0], NULL); 1376 KASSERT(imo->imo_mfilters == NULL, 1377 ("%s: imo_mfilters != NULL", __func__)); 1378 free(imo->imo_membership, M_CARP); 1379 imo->imo_membership = NULL; 1380 1381 } 1382 break; 1383 #endif 1384 #ifdef INET6 1385 case AF_INET6: 1386 if (cif->cif_naddrs6 == 0) { 1387 struct ip6_moptions *im6o = &cif->cif_im6o; 1388 1389 in6_mc_leave(im6o->im6o_membership[0], NULL); 1390 in6_mc_leave(im6o->im6o_membership[1], NULL); 1391 KASSERT(im6o->im6o_mfilters == NULL, 1392 ("%s: im6o_mfilters != NULL", __func__)); 1393 free(im6o->im6o_membership, M_CARP); 1394 im6o->im6o_membership = NULL; 1395 } 1396 break; 1397 #endif 1398 } 1399 } 1400 1401 int 1402 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1403 { 1404 struct m_tag *mtag; 1405 struct carp_softc *sc; 1406 1407 if (!sa) 1408 return (0); 1409 1410 switch (sa->sa_family) { 1411 #ifdef INET 1412 case AF_INET: 1413 break; 1414 #endif 1415 #ifdef INET6 1416 case AF_INET6: 1417 break; 1418 #endif 1419 default: 1420 return (0); 1421 } 1422 1423 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1424 if (mtag == NULL) 1425 return (0); 1426 1427 bcopy(mtag + 1, &sc, sizeof(sc)); 1428 1429 /* Set the source MAC address to the Virtual Router MAC Address. */ 1430 switch (ifp->if_type) { 1431 case IFT_ETHER: 1432 case IFT_BRIDGE: 1433 case IFT_L2VLAN: { 1434 struct ether_header *eh; 1435 1436 eh = mtod(m, struct ether_header *); 1437 eh->ether_shost[0] = 0; 1438 eh->ether_shost[1] = 0; 1439 eh->ether_shost[2] = 0x5e; 1440 eh->ether_shost[3] = 0; 1441 eh->ether_shost[4] = 1; 1442 eh->ether_shost[5] = sc->sc_vhid; 1443 } 1444 break; 1445 case IFT_FDDI: { 1446 struct fddi_header *fh; 1447 1448 fh = mtod(m, struct fddi_header *); 1449 fh->fddi_shost[0] = 0; 1450 fh->fddi_shost[1] = 0; 1451 fh->fddi_shost[2] = 0x5e; 1452 fh->fddi_shost[3] = 0; 1453 fh->fddi_shost[4] = 1; 1454 fh->fddi_shost[5] = sc->sc_vhid; 1455 } 1456 break; 1457 case IFT_ISO88025: { 1458 struct iso88025_header *th; 1459 th = mtod(m, struct iso88025_header *); 1460 th->iso88025_shost[0] = 3; 1461 th->iso88025_shost[1] = 0; 1462 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 1463 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 1464 th->iso88025_shost[4] = 0; 1465 th->iso88025_shost[5] = 0; 1466 } 1467 break; 1468 default: 1469 printf("%s: carp is not supported for the %d interface type\n", 1470 ifp->if_xname, ifp->if_type); 1471 return (EOPNOTSUPP); 1472 } 1473 1474 return (0); 1475 } 1476 1477 static struct carp_softc* 1478 carp_alloc(struct ifnet *ifp) 1479 { 1480 struct carp_softc *sc; 1481 struct carp_if *cif; 1482 1483 if ((cif = ifp->if_carp) == NULL) 1484 cif = carp_alloc_if(ifp); 1485 1486 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1487 1488 sc->sc_advbase = CARP_DFLTINTV; 1489 sc->sc_vhid = -1; /* required setting */ 1490 sc->sc_init_counter = 1; 1491 sc->sc_state = INIT; 1492 1493 sc->sc_ifasiz = sizeof(struct ifaddr *); 1494 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1495 sc->sc_carpdev = ifp; 1496 1497 CARP_LOCK_INIT(sc); 1498 #ifdef INET 1499 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1500 #endif 1501 #ifdef INET6 1502 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1503 #endif 1504 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1505 1506 CIF_LOCK(cif); 1507 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1508 CIF_UNLOCK(cif); 1509 1510 mtx_lock(&carp_mtx); 1511 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1512 mtx_unlock(&carp_mtx); 1513 1514 return (sc); 1515 } 1516 1517 static int 1518 carp_grow_ifas(struct carp_softc *sc) 1519 { 1520 struct ifaddr **new; 1521 1522 CARP_LOCK_ASSERT(sc); 1523 1524 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO); 1525 if (new == NULL) 1526 return (ENOMEM); 1527 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1528 free(sc->sc_ifas, M_CARP); 1529 sc->sc_ifas = new; 1530 sc->sc_ifasiz *= 2; 1531 1532 return (0); 1533 } 1534 1535 static void 1536 carp_destroy(struct carp_softc *sc) 1537 { 1538 struct ifnet *ifp = sc->sc_carpdev; 1539 struct carp_if *cif = ifp->if_carp; 1540 1541 CIF_LOCK_ASSERT(cif); 1542 1543 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1544 1545 mtx_lock(&carp_mtx); 1546 LIST_REMOVE(sc, sc_next); 1547 mtx_unlock(&carp_mtx); 1548 1549 CARP_LOCK(sc); 1550 if (sc->sc_suppress) 1551 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1552 callout_drain(&sc->sc_ad_tmo); 1553 #ifdef INET 1554 callout_drain(&sc->sc_md_tmo); 1555 #endif 1556 #ifdef INET6 1557 callout_drain(&sc->sc_md6_tmo); 1558 #endif 1559 CARP_LOCK_DESTROY(sc); 1560 1561 free(sc->sc_ifas, M_CARP); 1562 free(sc, M_CARP); 1563 } 1564 1565 static struct carp_if* 1566 carp_alloc_if(struct ifnet *ifp) 1567 { 1568 struct carp_if *cif; 1569 int error; 1570 1571 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1572 1573 if ((error = ifpromisc(ifp, 1)) != 0) 1574 printf("%s: ifpromisc(%s) failed: %d\n", 1575 __func__, ifp->if_xname, error); 1576 else 1577 cif->cif_flags |= CIF_PROMISC; 1578 1579 CIF_LOCK_INIT(cif); 1580 cif->cif_ifp = ifp; 1581 TAILQ_INIT(&cif->cif_vrs); 1582 1583 IF_ADDR_WLOCK(ifp); 1584 ifp->if_carp = cif; 1585 if_ref(ifp); 1586 IF_ADDR_WUNLOCK(ifp); 1587 1588 return (cif); 1589 } 1590 1591 static void 1592 carp_free_if(struct carp_if *cif) 1593 { 1594 struct ifnet *ifp = cif->cif_ifp; 1595 1596 CIF_LOCK_ASSERT(cif); 1597 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1598 __func__)); 1599 1600 IF_ADDR_WLOCK(ifp); 1601 ifp->if_carp = NULL; 1602 IF_ADDR_WUNLOCK(ifp); 1603 1604 CIF_LOCK_DESTROY(cif); 1605 1606 if (cif->cif_flags & CIF_PROMISC) 1607 ifpromisc(ifp, 0); 1608 if_rele(ifp); 1609 1610 free(cif, M_CARP); 1611 } 1612 1613 static void 1614 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1615 { 1616 1617 CARP_LOCK(sc); 1618 carpr->carpr_state = sc->sc_state; 1619 carpr->carpr_vhid = sc->sc_vhid; 1620 carpr->carpr_advbase = sc->sc_advbase; 1621 carpr->carpr_advskew = sc->sc_advskew; 1622 if (priv) 1623 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1624 else 1625 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1626 CARP_UNLOCK(sc); 1627 } 1628 1629 int 1630 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1631 { 1632 struct carpreq carpr; 1633 struct ifnet *ifp; 1634 struct carp_softc *sc = NULL; 1635 int error = 0, locked = 0; 1636 1637 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1638 return (error); 1639 1640 ifp = ifunit_ref(ifr->ifr_name); 1641 if (ifp == NULL) 1642 return (ENXIO); 1643 1644 switch (ifp->if_type) { 1645 case IFT_ETHER: 1646 case IFT_L2VLAN: 1647 case IFT_BRIDGE: 1648 case IFT_FDDI: 1649 case IFT_ISO88025: 1650 break; 1651 default: 1652 error = EOPNOTSUPP; 1653 goto out; 1654 } 1655 1656 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1657 error = EADDRNOTAVAIL; 1658 goto out; 1659 } 1660 1661 switch (cmd) { 1662 case SIOCSVH: 1663 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1664 break; 1665 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1666 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1667 error = EINVAL; 1668 break; 1669 } 1670 1671 if (ifp->if_carp) { 1672 CIF_LOCK(ifp->if_carp); 1673 IFNET_FOREACH_CARP(ifp, sc) 1674 if (sc->sc_vhid == carpr.carpr_vhid) 1675 break; 1676 CIF_UNLOCK(ifp->if_carp); 1677 } 1678 if (sc == NULL) { 1679 sc = carp_alloc(ifp); 1680 CARP_LOCK(sc); 1681 sc->sc_vhid = carpr.carpr_vhid; 1682 LLADDR(&sc->sc_addr)[0] = 0; 1683 LLADDR(&sc->sc_addr)[1] = 0; 1684 LLADDR(&sc->sc_addr)[2] = 0x5e; 1685 LLADDR(&sc->sc_addr)[3] = 0; 1686 LLADDR(&sc->sc_addr)[4] = 1; 1687 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1688 } else 1689 CARP_LOCK(sc); 1690 locked = 1; 1691 if (carpr.carpr_advbase > 0) { 1692 if (carpr.carpr_advbase > 255 || 1693 carpr.carpr_advbase < CARP_DFLTINTV) { 1694 error = EINVAL; 1695 break; 1696 } 1697 sc->sc_advbase = carpr.carpr_advbase; 1698 } 1699 if (carpr.carpr_advskew > 0) { 1700 if (carpr.carpr_advskew >= 255) { 1701 error = EINVAL; 1702 break; 1703 } 1704 sc->sc_advskew = carpr.carpr_advskew; 1705 } 1706 if (carpr.carpr_key[0] != '\0') { 1707 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1708 carp_hmac_prepare(sc); 1709 } 1710 if (sc->sc_state != INIT && 1711 carpr.carpr_state != sc->sc_state) { 1712 switch (carpr.carpr_state) { 1713 case BACKUP: 1714 callout_stop(&sc->sc_ad_tmo); 1715 carp_set_state(sc, BACKUP); 1716 carp_setrun(sc, 0); 1717 carp_delroute(sc); 1718 break; 1719 case MASTER: 1720 carp_master_down_locked(sc); 1721 break; 1722 default: 1723 break; 1724 } 1725 } 1726 break; 1727 1728 case SIOCGVH: 1729 { 1730 int priveleged; 1731 1732 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1733 error = EINVAL; 1734 break; 1735 } 1736 if (carpr.carpr_count < 1) { 1737 error = EMSGSIZE; 1738 break; 1739 } 1740 if (ifp->if_carp == NULL) { 1741 error = ENOENT; 1742 break; 1743 } 1744 1745 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1746 if (carpr.carpr_vhid != 0) { 1747 CIF_LOCK(ifp->if_carp); 1748 IFNET_FOREACH_CARP(ifp, sc) 1749 if (sc->sc_vhid == carpr.carpr_vhid) 1750 break; 1751 CIF_UNLOCK(ifp->if_carp); 1752 if (sc == NULL) { 1753 error = ENOENT; 1754 break; 1755 } 1756 carp_carprcp(&carpr, sc, priveleged); 1757 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1758 } else { 1759 int i, count; 1760 1761 count = 0; 1762 CIF_LOCK(ifp->if_carp); 1763 IFNET_FOREACH_CARP(ifp, sc) 1764 count++; 1765 1766 if (count > carpr.carpr_count) { 1767 CIF_UNLOCK(ifp->if_carp); 1768 error = EMSGSIZE; 1769 break; 1770 } 1771 1772 i = 0; 1773 IFNET_FOREACH_CARP(ifp, sc) { 1774 carp_carprcp(&carpr, sc, priveleged); 1775 carpr.carpr_count = count; 1776 error = copyout(&carpr, ifr->ifr_data + 1777 (i * sizeof(carpr)), sizeof(carpr)); 1778 if (error) { 1779 CIF_UNLOCK(ifp->if_carp); 1780 break; 1781 } 1782 i++; 1783 } 1784 CIF_UNLOCK(ifp->if_carp); 1785 } 1786 break; 1787 } 1788 default: 1789 error = EINVAL; 1790 } 1791 1792 out: 1793 if (locked) 1794 CARP_UNLOCK(sc); 1795 if_rele(ifp); 1796 1797 return (error); 1798 } 1799 1800 static int 1801 carp_get_vhid(struct ifaddr *ifa) 1802 { 1803 1804 if (ifa == NULL || ifa->ifa_carp == NULL) 1805 return (0); 1806 1807 return (ifa->ifa_carp->sc_vhid); 1808 } 1809 1810 int 1811 carp_attach(struct ifaddr *ifa, int vhid) 1812 { 1813 struct ifnet *ifp = ifa->ifa_ifp; 1814 struct carp_if *cif = ifp->if_carp; 1815 struct carp_softc *sc; 1816 int index, error; 1817 1818 if (ifp->if_carp == NULL) 1819 return (ENOPROTOOPT); 1820 1821 switch (ifa->ifa_addr->sa_family) { 1822 #ifdef INET 1823 case AF_INET: 1824 #endif 1825 #ifdef INET6 1826 case AF_INET6: 1827 #endif 1828 break; 1829 default: 1830 return (EPROTOTYPE); 1831 } 1832 1833 CIF_LOCK(cif); 1834 IFNET_FOREACH_CARP(ifp, sc) 1835 if (sc->sc_vhid == vhid) 1836 break; 1837 if (sc == NULL) { 1838 CIF_UNLOCK(cif); 1839 return (ENOENT); 1840 } 1841 1842 if (ifa->ifa_carp) { 1843 if (ifa->ifa_carp->sc_vhid != vhid) 1844 carp_detach_locked(ifa); 1845 else { 1846 CIF_UNLOCK(cif); 1847 return (0); 1848 } 1849 } 1850 1851 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1852 if (error) { 1853 CIF_FREE(cif); 1854 return (error); 1855 } 1856 1857 CARP_LOCK(sc); 1858 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1859 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1860 if ((error = carp_grow_ifas(sc)) != 0) { 1861 carp_multicast_cleanup(cif, 1862 ifa->ifa_addr->sa_family); 1863 CARP_UNLOCK(sc); 1864 CIF_FREE(cif); 1865 return (error); 1866 } 1867 1868 switch (ifa->ifa_addr->sa_family) { 1869 #ifdef INET 1870 case AF_INET: 1871 cif->cif_naddrs++; 1872 sc->sc_naddrs++; 1873 break; 1874 #endif 1875 #ifdef INET6 1876 case AF_INET6: 1877 cif->cif_naddrs6++; 1878 sc->sc_naddrs6++; 1879 break; 1880 #endif 1881 } 1882 1883 ifa_ref(ifa); 1884 sc->sc_ifas[index - 1] = ifa; 1885 ifa->ifa_carp = sc; 1886 1887 carp_hmac_prepare(sc); 1888 carp_sc_state(sc); 1889 1890 CARP_UNLOCK(sc); 1891 CIF_UNLOCK(cif); 1892 1893 return (0); 1894 } 1895 1896 void 1897 carp_detach(struct ifaddr *ifa) 1898 { 1899 struct ifnet *ifp = ifa->ifa_ifp; 1900 struct carp_if *cif = ifp->if_carp; 1901 1902 CIF_LOCK(cif); 1903 carp_detach_locked(ifa); 1904 CIF_FREE(cif); 1905 } 1906 1907 static void 1908 carp_detach_locked(struct ifaddr *ifa) 1909 { 1910 struct ifnet *ifp = ifa->ifa_ifp; 1911 struct carp_if *cif = ifp->if_carp; 1912 struct carp_softc *sc = ifa->ifa_carp; 1913 int i, index; 1914 1915 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1916 1917 CIF_LOCK_ASSERT(cif); 1918 CARP_LOCK(sc); 1919 1920 /* Shift array. */ 1921 index = sc->sc_naddrs + sc->sc_naddrs6; 1922 for (i = 0; i < index; i++) 1923 if (sc->sc_ifas[i] == ifa) 1924 break; 1925 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1926 for (; i < index - 1; i++) 1927 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1928 sc->sc_ifas[index - 1] = NULL; 1929 1930 switch (ifa->ifa_addr->sa_family) { 1931 #ifdef INET 1932 case AF_INET: 1933 cif->cif_naddrs--; 1934 sc->sc_naddrs--; 1935 break; 1936 #endif 1937 #ifdef INET6 1938 case AF_INET6: 1939 cif->cif_naddrs6--; 1940 sc->sc_naddrs6--; 1941 break; 1942 #endif 1943 } 1944 1945 carp_ifa_delroute(ifa); 1946 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 1947 1948 ifa->ifa_carp = NULL; 1949 ifa_free(ifa); 1950 1951 carp_hmac_prepare(sc); 1952 carp_sc_state(sc); 1953 1954 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1955 CARP_UNLOCK(sc); 1956 carp_destroy(sc); 1957 } else 1958 CARP_UNLOCK(sc); 1959 } 1960 1961 static void 1962 carp_set_state(struct carp_softc *sc, int state) 1963 { 1964 1965 CARP_LOCK_ASSERT(sc); 1966 1967 if (sc->sc_state != state) { 1968 const char *carp_states[] = { CARP_STATES }; 1969 char subsys[IFNAMSIZ+5]; 1970 1971 sc->sc_state = state; 1972 1973 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 1974 sc->sc_carpdev->if_xname); 1975 devctl_notify("CARP", subsys, carp_states[state], NULL); 1976 } 1977 } 1978 1979 static void 1980 carp_linkstate(struct ifnet *ifp) 1981 { 1982 struct carp_softc *sc; 1983 1984 CIF_LOCK(ifp->if_carp); 1985 IFNET_FOREACH_CARP(ifp, sc) { 1986 CARP_LOCK(sc); 1987 carp_sc_state(sc); 1988 CARP_UNLOCK(sc); 1989 } 1990 CIF_UNLOCK(ifp->if_carp); 1991 } 1992 1993 static void 1994 carp_sc_state(struct carp_softc *sc) 1995 { 1996 1997 CARP_LOCK_ASSERT(sc); 1998 1999 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2000 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2001 callout_stop(&sc->sc_ad_tmo); 2002 #ifdef INET 2003 callout_stop(&sc->sc_md_tmo); 2004 #endif 2005 #ifdef INET6 2006 callout_stop(&sc->sc_md6_tmo); 2007 #endif 2008 carp_set_state(sc, INIT); 2009 carp_setrun(sc, 0); 2010 if (!sc->sc_suppress) 2011 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2012 sc->sc_suppress = 1; 2013 } else { 2014 carp_set_state(sc, INIT); 2015 carp_setrun(sc, 0); 2016 if (sc->sc_suppress) 2017 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2018 sc->sc_suppress = 0; 2019 } 2020 } 2021 2022 static void 2023 carp_demote_adj(int adj, char *reason) 2024 { 2025 atomic_add_int(&V_carp_demotion, adj); 2026 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2027 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2028 } 2029 2030 static int 2031 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2032 { 2033 int new, error; 2034 2035 new = V_carp_demotion; 2036 error = sysctl_handle_int(oidp, &new, 0, req); 2037 if (error || !req->newptr) 2038 return (error); 2039 2040 carp_demote_adj(new, "sysctl"); 2041 2042 return (0); 2043 } 2044 2045 #ifdef INET 2046 extern struct domain inetdomain; 2047 static struct protosw in_carp_protosw = { 2048 .pr_type = SOCK_RAW, 2049 .pr_domain = &inetdomain, 2050 .pr_protocol = IPPROTO_CARP, 2051 .pr_flags = PR_ATOMIC|PR_ADDR, 2052 .pr_input = carp_input, 2053 .pr_output = (pr_output_t *)rip_output, 2054 .pr_ctloutput = rip_ctloutput, 2055 .pr_usrreqs = &rip_usrreqs 2056 }; 2057 #endif 2058 2059 #ifdef INET6 2060 extern struct domain inet6domain; 2061 static struct ip6protosw in6_carp_protosw = { 2062 .pr_type = SOCK_RAW, 2063 .pr_domain = &inet6domain, 2064 .pr_protocol = IPPROTO_CARP, 2065 .pr_flags = PR_ATOMIC|PR_ADDR, 2066 .pr_input = carp6_input, 2067 .pr_output = rip6_output, 2068 .pr_ctloutput = rip6_ctloutput, 2069 .pr_usrreqs = &rip6_usrreqs 2070 }; 2071 #endif 2072 2073 static void 2074 carp_mod_cleanup(void) 2075 { 2076 2077 #ifdef INET 2078 if (proto_reg[CARP_INET] == 0) { 2079 (void)ipproto_unregister(IPPROTO_CARP); 2080 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2081 proto_reg[CARP_INET] = -1; 2082 } 2083 carp_iamatch_p = NULL; 2084 #endif 2085 #ifdef INET6 2086 if (proto_reg[CARP_INET6] == 0) { 2087 (void)ip6proto_unregister(IPPROTO_CARP); 2088 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2089 proto_reg[CARP_INET6] = -1; 2090 } 2091 carp_iamatch6_p = NULL; 2092 carp_macmatch6_p = NULL; 2093 #endif 2094 carp_ioctl_p = NULL; 2095 carp_attach_p = NULL; 2096 carp_detach_p = NULL; 2097 carp_get_vhid_p = NULL; 2098 carp_linkstate_p = NULL; 2099 carp_forus_p = NULL; 2100 carp_output_p = NULL; 2101 carp_demote_adj_p = NULL; 2102 carp_master_p = NULL; 2103 mtx_unlock(&carp_mtx); 2104 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2105 mtx_destroy(&carp_mtx); 2106 } 2107 2108 static int 2109 carp_mod_load(void) 2110 { 2111 int err; 2112 2113 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2114 LIST_INIT(&carp_list); 2115 carp_get_vhid_p = carp_get_vhid; 2116 carp_forus_p = carp_forus; 2117 carp_output_p = carp_output; 2118 carp_linkstate_p = carp_linkstate; 2119 carp_ioctl_p = carp_ioctl; 2120 carp_attach_p = carp_attach; 2121 carp_detach_p = carp_detach; 2122 carp_demote_adj_p = carp_demote_adj; 2123 carp_master_p = carp_master; 2124 #ifdef INET6 2125 carp_iamatch6_p = carp_iamatch6; 2126 carp_macmatch6_p = carp_macmatch6; 2127 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2128 (struct protosw *)&in6_carp_protosw); 2129 if (proto_reg[CARP_INET6]) { 2130 printf("carp: error %d attaching to PF_INET6\n", 2131 proto_reg[CARP_INET6]); 2132 carp_mod_cleanup(); 2133 return (proto_reg[CARP_INET6]); 2134 } 2135 err = ip6proto_register(IPPROTO_CARP); 2136 if (err) { 2137 printf("carp: error %d registering with INET6\n", err); 2138 carp_mod_cleanup(); 2139 return (err); 2140 } 2141 #endif 2142 #ifdef INET 2143 carp_iamatch_p = carp_iamatch; 2144 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2145 if (proto_reg[CARP_INET]) { 2146 printf("carp: error %d attaching to PF_INET\n", 2147 proto_reg[CARP_INET]); 2148 carp_mod_cleanup(); 2149 return (proto_reg[CARP_INET]); 2150 } 2151 err = ipproto_register(IPPROTO_CARP); 2152 if (err) { 2153 printf("carp: error %d registering with INET\n", err); 2154 carp_mod_cleanup(); 2155 return (err); 2156 } 2157 #endif 2158 return (0); 2159 } 2160 2161 static int 2162 carp_modevent(module_t mod, int type, void *data) 2163 { 2164 switch (type) { 2165 case MOD_LOAD: 2166 return carp_mod_load(); 2167 /* NOTREACHED */ 2168 case MOD_UNLOAD: 2169 mtx_lock(&carp_mtx); 2170 if (LIST_EMPTY(&carp_list)) 2171 carp_mod_cleanup(); 2172 else { 2173 mtx_unlock(&carp_mtx); 2174 return (EBUSY); 2175 } 2176 break; 2177 2178 default: 2179 return (EINVAL); 2180 } 2181 2182 return (0); 2183 } 2184 2185 static moduledata_t carp_mod = { 2186 "carp", 2187 carp_modevent, 2188 0 2189 }; 2190 2191 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2192