1 /*- 2 * Copyright (c) 2002 Michael Shalayeff. 3 * Copyright (c) 2003 Ryan McBride. 4 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_bpf.h" 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/bus.h> 39 #include <sys/jail.h> 40 #include <sys/kernel.h> 41 #include <sys/limits.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/module.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/counter.h> 54 55 #include <net/ethernet.h> 56 #include <net/fddi.h> 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/if_llatbl.h> 61 #include <net/if_types.h> 62 #include <net/iso88025.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 90 91 struct carp_softc { 92 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 93 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 94 struct sockaddr_dl sc_addr; /* Our link level address. */ 95 struct callout sc_ad_tmo; /* Advertising timeout. */ 96 #ifdef INET 97 struct callout sc_md_tmo; /* Master down timeout. */ 98 #endif 99 #ifdef INET6 100 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 101 #endif 102 struct mtx sc_mtx; 103 104 int sc_vhid; 105 int sc_advskew; 106 int sc_advbase; 107 108 int sc_naddrs; 109 int sc_naddrs6; 110 int sc_ifasiz; 111 enum { INIT = 0, BACKUP, MASTER } sc_state; 112 int sc_suppress; 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_init_counter; 119 uint64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 128 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 129 }; 130 131 struct carp_if { 132 #ifdef INET 133 int cif_naddrs; 134 #endif 135 #ifdef INET6 136 int cif_naddrs6; 137 #endif 138 TAILQ_HEAD(, carp_softc) cif_vrs; 139 #ifdef INET 140 struct ip_moptions cif_imo; 141 #endif 142 #ifdef INET6 143 struct ip6_moptions cif_im6o; 144 #endif 145 struct ifnet *cif_ifp; 146 struct mtx cif_mtx; 147 uint32_t cif_flags; 148 #define CIF_PROMISC 0x00000001 149 }; 150 151 #define CARP_INET 0 152 #define CARP_INET6 1 153 static int proto_reg[] = {-1, -1}; 154 155 /* 156 * Brief design of carp(4). 157 * 158 * Any carp-capable ifnet may have a list of carp softcs hanging off 159 * its ifp->if_carp pointer. Each softc represents one unique virtual 160 * host id, or vhid. The softc has a back pointer to the ifnet. All 161 * softcs are joined in a global list, which has quite limited use. 162 * 163 * Any interface address that takes part in CARP negotiation has a 164 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 165 * AF_INET or AF_INET6 address. 166 * 167 * Although, one can get the softc's backpointer to ifnet and traverse 168 * through its ifp->if_addrhead queue to find all interface addresses 169 * involved in CARP, we keep a growable array of ifaddr pointers. This 170 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 171 * do calls into the network stack, thus avoiding LORs. 172 * 173 * Locking: 174 * 175 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 176 * callout-driven events and ioctl()s. 177 * 178 * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to 179 * traverse the global list we use the mutex carp_mtx. 180 * 181 * Known issues with locking: 182 * 183 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 184 * counting is done on the softc. 185 * - On module unload we may race (?) with packet processing thread 186 * dereferencing our function pointers. 187 */ 188 189 /* Accept incoming CARP packets. */ 190 static VNET_DEFINE(int, carp_allow) = 1; 191 #define V_carp_allow VNET(carp_allow) 192 193 /* Preempt slower nodes. */ 194 static VNET_DEFINE(int, carp_preempt) = 0; 195 #define V_carp_preempt VNET(carp_preempt) 196 197 /* Log level. */ 198 static VNET_DEFINE(int, carp_log) = 1; 199 #define V_carp_log VNET(carp_log) 200 201 /* Global advskew demotion. */ 202 static VNET_DEFINE(int, carp_demotion) = 0; 203 #define V_carp_demotion VNET(carp_demotion) 204 205 /* Send error demotion factor. */ 206 static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW; 207 #define V_carp_senderr_adj VNET(carp_senderr_adj) 208 209 /* Iface down demotion factor. */ 210 static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW; 211 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 212 213 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 214 215 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); 216 SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLFLAG_RW, 217 &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets"); 218 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 219 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 220 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 221 &VNET_NAME(carp_log), 0, "CARP log level"); 222 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 223 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 224 0, 0, carp_demote_adj_sysctl, "I", 225 "Adjust demotion factor (skew of advskew)"); 226 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 227 CTLFLAG_VNET | CTLFLAG_RW, 228 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 229 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 230 CTLFLAG_VNET | CTLFLAG_RW, 231 &VNET_NAME(carp_ifdown_adj), 0, 232 "Interface down demotion factor adjustment"); 233 234 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 235 VNET_PCPUSTAT_SYSINIT(carpstats); 236 VNET_PCPUSTAT_SYSUNINIT(carpstats); 237 238 #define CARPSTATS_ADD(name, val) \ 239 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 240 sizeof(uint64_t)], (val)) 241 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 242 243 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 244 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 245 246 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 247 NULL, MTX_DEF) 248 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 249 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 250 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 251 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 252 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 253 NULL, MTX_DEF) 254 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 255 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 256 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 257 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 258 #define CIF_FREE(cif) do { \ 259 CIF_LOCK(cif); \ 260 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 261 carp_free_if(cif); \ 262 else \ 263 CIF_UNLOCK(cif); \ 264 } while (0) 265 266 #define CARP_LOG(...) do { \ 267 if (V_carp_log > 0) \ 268 log(LOG_INFO, "carp: " __VA_ARGS__); \ 269 } while (0) 270 271 #define CARP_DEBUG(...) do { \ 272 if (V_carp_log > 1) \ 273 log(LOG_DEBUG, __VA_ARGS__); \ 274 } while (0) 275 276 #define IFNET_FOREACH_IFA(ifp, ifa) \ 277 IF_ADDR_LOCK_ASSERT(ifp); \ 278 TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 279 if ((ifa)->ifa_carp != NULL) 280 281 #define CARP_FOREACH_IFA(sc, ifa) \ 282 CARP_LOCK_ASSERT(sc); \ 283 for (int _i = 0; \ 284 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 285 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 286 ++_i) 287 288 #define IFNET_FOREACH_CARP(ifp, sc) \ 289 CIF_LOCK_ASSERT(ifp->if_carp); \ 290 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 291 292 #define DEMOTE_ADVSKEW(sc) \ 293 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 294 CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) 295 296 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 297 static struct carp_softc 298 *carp_alloc(struct ifnet *); 299 static void carp_destroy(struct carp_softc *); 300 static struct carp_if 301 *carp_alloc_if(struct ifnet *); 302 static void carp_free_if(struct carp_if *); 303 static void carp_set_state(struct carp_softc *, int, const char* reason); 304 static void carp_sc_state(struct carp_softc *); 305 static void carp_setrun(struct carp_softc *, sa_family_t); 306 static void carp_master_down(void *); 307 static void carp_master_down_locked(struct carp_softc *, 308 const char* reason); 309 static void carp_send_ad(void *); 310 static void carp_send_ad_locked(struct carp_softc *); 311 static void carp_addroute(struct carp_softc *); 312 static void carp_ifa_addroute(struct ifaddr *); 313 static void carp_delroute(struct carp_softc *); 314 static void carp_ifa_delroute(struct ifaddr *); 315 static void carp_send_ad_all(void *, int); 316 static void carp_demote_adj(int, char *); 317 318 static LIST_HEAD(, carp_softc) carp_list; 319 static struct mtx carp_mtx; 320 static struct sx carp_sx; 321 static struct task carp_sendall_task = 322 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 323 324 static void 325 carp_hmac_prepare(struct carp_softc *sc) 326 { 327 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 328 uint8_t vhid = sc->sc_vhid & 0xff; 329 struct ifaddr *ifa; 330 int i, found; 331 #ifdef INET 332 struct in_addr last, cur, in; 333 #endif 334 #ifdef INET6 335 struct in6_addr last6, cur6, in6; 336 #endif 337 338 CARP_LOCK_ASSERT(sc); 339 340 /* Compute ipad from key. */ 341 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 342 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 343 for (i = 0; i < sizeof(sc->sc_pad); i++) 344 sc->sc_pad[i] ^= 0x36; 345 346 /* Precompute first part of inner hash. */ 347 SHA1Init(&sc->sc_sha1); 348 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 349 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 350 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 351 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 352 #ifdef INET 353 cur.s_addr = 0; 354 do { 355 found = 0; 356 last = cur; 357 cur.s_addr = 0xffffffff; 358 CARP_FOREACH_IFA(sc, ifa) { 359 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 360 if (ifa->ifa_addr->sa_family == AF_INET && 361 ntohl(in.s_addr) > ntohl(last.s_addr) && 362 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 363 cur.s_addr = in.s_addr; 364 found++; 365 } 366 } 367 if (found) 368 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 369 } while (found); 370 #endif /* INET */ 371 #ifdef INET6 372 memset(&cur6, 0, sizeof(cur6)); 373 do { 374 found = 0; 375 last6 = cur6; 376 memset(&cur6, 0xff, sizeof(cur6)); 377 CARP_FOREACH_IFA(sc, ifa) { 378 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 379 if (IN6_IS_SCOPE_EMBED(&in6)) 380 in6.s6_addr16[1] = 0; 381 if (ifa->ifa_addr->sa_family == AF_INET6 && 382 memcmp(&in6, &last6, sizeof(in6)) > 0 && 383 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 384 cur6 = in6; 385 found++; 386 } 387 } 388 if (found) 389 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 390 } while (found); 391 #endif /* INET6 */ 392 393 /* convert ipad to opad */ 394 for (i = 0; i < sizeof(sc->sc_pad); i++) 395 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 396 } 397 398 static void 399 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 400 unsigned char md[20]) 401 { 402 SHA1_CTX sha1ctx; 403 404 CARP_LOCK_ASSERT(sc); 405 406 /* fetch first half of inner hash */ 407 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 408 409 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 410 SHA1Final(md, &sha1ctx); 411 412 /* outer hash */ 413 SHA1Init(&sha1ctx); 414 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 415 SHA1Update(&sha1ctx, md, 20); 416 SHA1Final(md, &sha1ctx); 417 } 418 419 static int 420 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 421 unsigned char md[20]) 422 { 423 unsigned char md2[20]; 424 425 CARP_LOCK_ASSERT(sc); 426 427 carp_hmac_generate(sc, counter, md2); 428 429 return (bcmp(md, md2, sizeof(md2))); 430 } 431 432 /* 433 * process input packet. 434 * we have rearranged checks order compared to the rfc, 435 * but it seems more efficient this way or not possible otherwise. 436 */ 437 #ifdef INET 438 int 439 carp_input(struct mbuf **mp, int *offp, int proto) 440 { 441 struct mbuf *m = *mp; 442 struct ip *ip = mtod(m, struct ip *); 443 struct carp_header *ch; 444 int iplen, len; 445 446 iplen = *offp; 447 *mp = NULL; 448 449 CARPSTATS_INC(carps_ipackets); 450 451 if (!V_carp_allow) { 452 m_freem(m); 453 return (IPPROTO_DONE); 454 } 455 456 /* verify that the IP TTL is 255. */ 457 if (ip->ip_ttl != CARP_DFLTTL) { 458 CARPSTATS_INC(carps_badttl); 459 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 460 ip->ip_ttl, 461 m->m_pkthdr.rcvif->if_xname); 462 m_freem(m); 463 return (IPPROTO_DONE); 464 } 465 466 iplen = ip->ip_hl << 2; 467 468 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 469 CARPSTATS_INC(carps_badlen); 470 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 471 "on %s\n", __func__, m->m_len - sizeof(struct ip), 472 m->m_pkthdr.rcvif->if_xname); 473 m_freem(m); 474 return (IPPROTO_DONE); 475 } 476 477 if (iplen + sizeof(*ch) < m->m_len) { 478 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 479 CARPSTATS_INC(carps_hdrops); 480 CARP_DEBUG("%s: pullup failed\n", __func__); 481 return (IPPROTO_DONE); 482 } 483 ip = mtod(m, struct ip *); 484 } 485 ch = (struct carp_header *)((char *)ip + iplen); 486 487 /* 488 * verify that the received packet length is 489 * equal to the CARP header 490 */ 491 len = iplen + sizeof(*ch); 492 if (len > m->m_pkthdr.len) { 493 CARPSTATS_INC(carps_badlen); 494 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 495 m->m_pkthdr.len, 496 m->m_pkthdr.rcvif->if_xname); 497 m_freem(m); 498 return (IPPROTO_DONE); 499 } 500 501 if ((m = m_pullup(m, len)) == NULL) { 502 CARPSTATS_INC(carps_hdrops); 503 return (IPPROTO_DONE); 504 } 505 ip = mtod(m, struct ip *); 506 ch = (struct carp_header *)((char *)ip + iplen); 507 508 /* verify the CARP checksum */ 509 m->m_data += iplen; 510 if (in_cksum(m, len - iplen)) { 511 CARPSTATS_INC(carps_badsum); 512 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 513 m->m_pkthdr.rcvif->if_xname); 514 m_freem(m); 515 return (IPPROTO_DONE); 516 } 517 m->m_data -= iplen; 518 519 carp_input_c(m, ch, AF_INET); 520 return (IPPROTO_DONE); 521 } 522 #endif 523 524 #ifdef INET6 525 int 526 carp6_input(struct mbuf **mp, int *offp, int proto) 527 { 528 struct mbuf *m = *mp; 529 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 530 struct carp_header *ch; 531 u_int len; 532 533 CARPSTATS_INC(carps_ipackets6); 534 535 if (!V_carp_allow) { 536 m_freem(m); 537 return (IPPROTO_DONE); 538 } 539 540 /* check if received on a valid carp interface */ 541 if (m->m_pkthdr.rcvif->if_carp == NULL) { 542 CARPSTATS_INC(carps_badif); 543 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 544 __func__, m->m_pkthdr.rcvif->if_xname); 545 m_freem(m); 546 return (IPPROTO_DONE); 547 } 548 549 /* verify that the IP TTL is 255 */ 550 if (ip6->ip6_hlim != CARP_DFLTTL) { 551 CARPSTATS_INC(carps_badttl); 552 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 553 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 554 m_freem(m); 555 return (IPPROTO_DONE); 556 } 557 558 /* verify that we have a complete carp packet */ 559 len = m->m_len; 560 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 561 if (ch == NULL) { 562 CARPSTATS_INC(carps_badlen); 563 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 564 return (IPPROTO_DONE); 565 } 566 567 568 /* verify the CARP checksum */ 569 m->m_data += *offp; 570 if (in_cksum(m, sizeof(*ch))) { 571 CARPSTATS_INC(carps_badsum); 572 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 573 m->m_pkthdr.rcvif->if_xname); 574 m_freem(m); 575 return (IPPROTO_DONE); 576 } 577 m->m_data -= *offp; 578 579 carp_input_c(m, ch, AF_INET6); 580 return (IPPROTO_DONE); 581 } 582 #endif /* INET6 */ 583 584 /* 585 * This routine should not be necessary at all, but some switches 586 * (VMWare ESX vswitches) can echo our own packets back at us, 587 * and we must ignore them or they will cause us to drop out of 588 * MASTER mode. 589 * 590 * We cannot catch all cases of network loops. Instead, what we 591 * do here is catch any packet that arrives with a carp header 592 * with a VHID of 0, that comes from an address that is our own. 593 * These packets are by definition "from us" (even if they are from 594 * a misconfigured host that is pretending to be us). 595 * 596 * The VHID test is outside this mini-function. 597 */ 598 static int 599 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 600 { 601 #ifdef INET 602 struct ip *ip4; 603 struct in_addr in4; 604 #endif 605 #ifdef INET6 606 struct ip6_hdr *ip6; 607 struct in6_addr in6; 608 #endif 609 610 switch (af) { 611 #ifdef INET 612 case AF_INET: 613 ip4 = mtod(m, struct ip *); 614 in4 = ifatoia(ifa)->ia_addr.sin_addr; 615 return (in4.s_addr == ip4->ip_src.s_addr); 616 #endif 617 #ifdef INET6 618 case AF_INET6: 619 ip6 = mtod(m, struct ip6_hdr *); 620 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 621 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 622 #endif 623 default: 624 break; 625 } 626 return (0); 627 } 628 629 static void 630 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 631 { 632 struct ifnet *ifp = m->m_pkthdr.rcvif; 633 struct ifaddr *ifa, *match; 634 struct carp_softc *sc; 635 uint64_t tmp_counter; 636 struct timeval sc_tv, ch_tv; 637 int error; 638 639 /* 640 * Verify that the VHID is valid on the receiving interface. 641 * 642 * There should be just one match. If there are none 643 * the VHID is not valid and we drop the packet. If 644 * there are multiple VHID matches, take just the first 645 * one, for compatibility with previous code. While we're 646 * scanning, check for obvious loops in the network topology 647 * (these should never happen, and as noted above, we may 648 * miss real loops; this is just a double-check). 649 */ 650 IF_ADDR_RLOCK(ifp); 651 error = 0; 652 match = NULL; 653 IFNET_FOREACH_IFA(ifp, ifa) { 654 if (match == NULL && ifa->ifa_carp != NULL && 655 ifa->ifa_addr->sa_family == af && 656 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 657 match = ifa; 658 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 659 error = ELOOP; 660 } 661 ifa = error ? NULL : match; 662 if (ifa != NULL) 663 ifa_ref(ifa); 664 IF_ADDR_RUNLOCK(ifp); 665 666 if (ifa == NULL) { 667 if (error == ELOOP) { 668 CARP_DEBUG("dropping looped packet on interface %s\n", 669 ifp->if_xname); 670 CARPSTATS_INC(carps_badif); /* ??? */ 671 } else { 672 CARPSTATS_INC(carps_badvhid); 673 } 674 m_freem(m); 675 return; 676 } 677 678 /* verify the CARP version. */ 679 if (ch->carp_version != CARP_VERSION) { 680 CARPSTATS_INC(carps_badver); 681 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 682 ch->carp_version); 683 ifa_free(ifa); 684 m_freem(m); 685 return; 686 } 687 688 sc = ifa->ifa_carp; 689 CARP_LOCK(sc); 690 ifa_free(ifa); 691 692 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 693 CARPSTATS_INC(carps_badauth); 694 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 695 sc->sc_vhid, ifp->if_xname); 696 goto out; 697 } 698 699 tmp_counter = ntohl(ch->carp_counter[0]); 700 tmp_counter = tmp_counter<<32; 701 tmp_counter += ntohl(ch->carp_counter[1]); 702 703 /* XXX Replay protection goes here */ 704 705 sc->sc_init_counter = 0; 706 sc->sc_counter = tmp_counter; 707 708 sc_tv.tv_sec = sc->sc_advbase; 709 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 710 ch_tv.tv_sec = ch->carp_advbase; 711 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 712 713 switch (sc->sc_state) { 714 case INIT: 715 break; 716 case MASTER: 717 /* 718 * If we receive an advertisement from a master who's going to 719 * be more frequent than us, go into BACKUP state. 720 */ 721 if (timevalcmp(&sc_tv, &ch_tv, >) || 722 timevalcmp(&sc_tv, &ch_tv, ==)) { 723 callout_stop(&sc->sc_ad_tmo); 724 carp_set_state(sc, BACKUP, 725 "more frequent advertisement received"); 726 carp_setrun(sc, 0); 727 carp_delroute(sc); 728 } 729 break; 730 case BACKUP: 731 /* 732 * If we're pre-empting masters who advertise slower than us, 733 * and this one claims to be slower, treat him as down. 734 */ 735 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 736 carp_master_down_locked(sc, 737 "preempting a slower master"); 738 break; 739 } 740 741 /* 742 * If the master is going to advertise at such a low frequency 743 * that he's guaranteed to time out, we'd might as well just 744 * treat him as timed out now. 745 */ 746 sc_tv.tv_sec = sc->sc_advbase * 3; 747 if (timevalcmp(&sc_tv, &ch_tv, <)) { 748 carp_master_down_locked(sc, "master will time out"); 749 break; 750 } 751 752 /* 753 * Otherwise, we reset the counter and wait for the next 754 * advertisement. 755 */ 756 carp_setrun(sc, af); 757 break; 758 } 759 760 out: 761 CARP_UNLOCK(sc); 762 m_freem(m); 763 } 764 765 static int 766 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 767 { 768 struct m_tag *mtag; 769 770 if (sc->sc_init_counter) { 771 /* this could also be seconds since unix epoch */ 772 sc->sc_counter = arc4random(); 773 sc->sc_counter = sc->sc_counter << 32; 774 sc->sc_counter += arc4random(); 775 } else 776 sc->sc_counter++; 777 778 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 779 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 780 781 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 782 783 /* Tag packet for carp_output */ 784 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 785 M_NOWAIT)) == NULL) { 786 m_freem(m); 787 CARPSTATS_INC(carps_onomem); 788 return (ENOMEM); 789 } 790 bcopy(&sc, mtag + 1, sizeof(sc)); 791 m_tag_prepend(m, mtag); 792 793 return (0); 794 } 795 796 /* 797 * To avoid LORs and possible recursions this function shouldn't 798 * be called directly, but scheduled via taskqueue. 799 */ 800 static void 801 carp_send_ad_all(void *ctx __unused, int pending __unused) 802 { 803 struct carp_softc *sc; 804 805 mtx_lock(&carp_mtx); 806 LIST_FOREACH(sc, &carp_list, sc_next) 807 if (sc->sc_state == MASTER) { 808 CARP_LOCK(sc); 809 CURVNET_SET(sc->sc_carpdev->if_vnet); 810 carp_send_ad_locked(sc); 811 CURVNET_RESTORE(); 812 CARP_UNLOCK(sc); 813 } 814 mtx_unlock(&carp_mtx); 815 } 816 817 /* Send a periodic advertisement, executed in callout context. */ 818 static void 819 carp_send_ad(void *v) 820 { 821 struct carp_softc *sc = v; 822 823 CARP_LOCK_ASSERT(sc); 824 CURVNET_SET(sc->sc_carpdev->if_vnet); 825 carp_send_ad_locked(sc); 826 CURVNET_RESTORE(); 827 CARP_UNLOCK(sc); 828 } 829 830 static void 831 carp_send_ad_error(struct carp_softc *sc, int error) 832 { 833 834 if (error) { 835 if (sc->sc_sendad_errors < INT_MAX) 836 sc->sc_sendad_errors++; 837 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 838 static const char fmt[] = "send error %d on %s"; 839 char msg[sizeof(fmt) + IFNAMSIZ]; 840 841 sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); 842 carp_demote_adj(V_carp_senderr_adj, msg); 843 } 844 sc->sc_sendad_success = 0; 845 } else { 846 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && 847 ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 848 static const char fmt[] = "send ok on %s"; 849 char msg[sizeof(fmt) + IFNAMSIZ]; 850 851 sprintf(msg, fmt, sc->sc_carpdev->if_xname); 852 carp_demote_adj(-V_carp_senderr_adj, msg); 853 sc->sc_sendad_errors = 0; 854 } else 855 sc->sc_sendad_errors = 0; 856 } 857 } 858 859 /* 860 * Pick the best ifaddr on the given ifp for sending CARP 861 * advertisements. 862 * 863 * "Best" here is defined by ifa_preferred(). This function is much 864 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 865 * 866 * (This could be simplified to return the actual address, except that 867 * it has a different format in AF_INET and AF_INET6.) 868 */ 869 static struct ifaddr * 870 carp_best_ifa(int af, struct ifnet *ifp) 871 { 872 struct ifaddr *ifa, *best; 873 874 if (af >= AF_MAX) 875 return (NULL); 876 best = NULL; 877 IF_ADDR_RLOCK(ifp); 878 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 879 if (ifa->ifa_addr->sa_family == af && 880 (best == NULL || ifa_preferred(best, ifa))) 881 best = ifa; 882 } 883 IF_ADDR_RUNLOCK(ifp); 884 if (best != NULL) 885 ifa_ref(best); 886 return (best); 887 } 888 889 static void 890 carp_send_ad_locked(struct carp_softc *sc) 891 { 892 struct carp_header ch; 893 struct timeval tv; 894 struct ifaddr *ifa; 895 struct carp_header *ch_ptr; 896 struct mbuf *m; 897 int len, advskew; 898 899 CARP_LOCK_ASSERT(sc); 900 901 advskew = DEMOTE_ADVSKEW(sc); 902 tv.tv_sec = sc->sc_advbase; 903 tv.tv_usec = advskew * 1000000 / 256; 904 905 ch.carp_version = CARP_VERSION; 906 ch.carp_type = CARP_ADVERTISEMENT; 907 ch.carp_vhid = sc->sc_vhid; 908 ch.carp_advbase = sc->sc_advbase; 909 ch.carp_advskew = advskew; 910 ch.carp_authlen = 7; /* XXX DEFINE */ 911 ch.carp_pad1 = 0; /* must be zero */ 912 ch.carp_cksum = 0; 913 914 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 915 916 #ifdef INET 917 if (sc->sc_naddrs) { 918 struct ip *ip; 919 920 m = m_gethdr(M_NOWAIT, MT_DATA); 921 if (m == NULL) { 922 CARPSTATS_INC(carps_onomem); 923 goto resched; 924 } 925 len = sizeof(*ip) + sizeof(ch); 926 m->m_pkthdr.len = len; 927 m->m_pkthdr.rcvif = NULL; 928 m->m_len = len; 929 M_ALIGN(m, m->m_len); 930 m->m_flags |= M_MCAST; 931 ip = mtod(m, struct ip *); 932 ip->ip_v = IPVERSION; 933 ip->ip_hl = sizeof(*ip) >> 2; 934 ip->ip_tos = IPTOS_LOWDELAY; 935 ip->ip_len = htons(len); 936 ip->ip_off = htons(IP_DF); 937 ip->ip_ttl = CARP_DFLTTL; 938 ip->ip_p = IPPROTO_CARP; 939 ip->ip_sum = 0; 940 ip_fillid(ip); 941 942 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 943 if (ifa != NULL) { 944 ip->ip_src.s_addr = 945 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 946 ifa_free(ifa); 947 } else 948 ip->ip_src.s_addr = 0; 949 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 950 951 ch_ptr = (struct carp_header *)(&ip[1]); 952 bcopy(&ch, ch_ptr, sizeof(ch)); 953 if (carp_prepare_ad(m, sc, ch_ptr)) 954 goto resched; 955 956 m->m_data += sizeof(*ip); 957 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 958 m->m_data -= sizeof(*ip); 959 960 CARPSTATS_INC(carps_opackets); 961 962 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 963 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 964 } 965 #endif /* INET */ 966 #ifdef INET6 967 if (sc->sc_naddrs6) { 968 struct ip6_hdr *ip6; 969 970 m = m_gethdr(M_NOWAIT, MT_DATA); 971 if (m == NULL) { 972 CARPSTATS_INC(carps_onomem); 973 goto resched; 974 } 975 len = sizeof(*ip6) + sizeof(ch); 976 m->m_pkthdr.len = len; 977 m->m_pkthdr.rcvif = NULL; 978 m->m_len = len; 979 M_ALIGN(m, m->m_len); 980 m->m_flags |= M_MCAST; 981 ip6 = mtod(m, struct ip6_hdr *); 982 bzero(ip6, sizeof(*ip6)); 983 ip6->ip6_vfc |= IPV6_VERSION; 984 ip6->ip6_hlim = CARP_DFLTTL; 985 ip6->ip6_nxt = IPPROTO_CARP; 986 987 /* set the source address */ 988 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 989 if (ifa != NULL) { 990 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 991 sizeof(struct in6_addr)); 992 ifa_free(ifa); 993 } else 994 /* This should never happen with IPv6. */ 995 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 996 997 /* Set the multicast destination. */ 998 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 999 ip6->ip6_dst.s6_addr8[15] = 0x12; 1000 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1001 m_freem(m); 1002 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1003 goto resched; 1004 } 1005 1006 ch_ptr = (struct carp_header *)(&ip6[1]); 1007 bcopy(&ch, ch_ptr, sizeof(ch)); 1008 if (carp_prepare_ad(m, sc, ch_ptr)) 1009 goto resched; 1010 1011 m->m_data += sizeof(*ip6); 1012 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1013 m->m_data -= sizeof(*ip6); 1014 1015 CARPSTATS_INC(carps_opackets6); 1016 1017 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1018 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1019 } 1020 #endif /* INET6 */ 1021 1022 resched: 1023 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1024 } 1025 1026 static void 1027 carp_addroute(struct carp_softc *sc) 1028 { 1029 struct ifaddr *ifa; 1030 1031 CARP_FOREACH_IFA(sc, ifa) 1032 carp_ifa_addroute(ifa); 1033 } 1034 1035 static void 1036 carp_ifa_addroute(struct ifaddr *ifa) 1037 { 1038 1039 switch (ifa->ifa_addr->sa_family) { 1040 #ifdef INET 1041 case AF_INET: 1042 in_addprefix(ifatoia(ifa), RTF_UP); 1043 ifa_add_loopback_route(ifa, 1044 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1045 break; 1046 #endif 1047 #ifdef INET6 1048 case AF_INET6: 1049 ifa_add_loopback_route(ifa, 1050 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1051 nd6_add_ifa_lle(ifatoia6(ifa)); 1052 break; 1053 #endif 1054 } 1055 } 1056 1057 static void 1058 carp_delroute(struct carp_softc *sc) 1059 { 1060 struct ifaddr *ifa; 1061 1062 CARP_FOREACH_IFA(sc, ifa) 1063 carp_ifa_delroute(ifa); 1064 } 1065 1066 static void 1067 carp_ifa_delroute(struct ifaddr *ifa) 1068 { 1069 1070 switch (ifa->ifa_addr->sa_family) { 1071 #ifdef INET 1072 case AF_INET: 1073 ifa_del_loopback_route(ifa, 1074 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1075 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1076 break; 1077 #endif 1078 #ifdef INET6 1079 case AF_INET6: 1080 ifa_del_loopback_route(ifa, 1081 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1082 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1083 break; 1084 #endif 1085 } 1086 } 1087 1088 int 1089 carp_master(struct ifaddr *ifa) 1090 { 1091 struct carp_softc *sc = ifa->ifa_carp; 1092 1093 return (sc->sc_state == MASTER); 1094 } 1095 1096 #ifdef INET 1097 /* 1098 * Broadcast a gratuitous ARP request containing 1099 * the virtual router MAC address for each IP address 1100 * associated with the virtual router. 1101 */ 1102 static void 1103 carp_send_arp(struct carp_softc *sc) 1104 { 1105 struct ifaddr *ifa; 1106 struct in_addr addr; 1107 1108 CARP_FOREACH_IFA(sc, ifa) { 1109 if (ifa->ifa_addr->sa_family != AF_INET) 1110 continue; 1111 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1112 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1113 } 1114 } 1115 1116 int 1117 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1118 { 1119 struct carp_softc *sc = ifa->ifa_carp; 1120 1121 if (sc->sc_state == MASTER) { 1122 *enaddr = LLADDR(&sc->sc_addr); 1123 return (1); 1124 } 1125 1126 return (0); 1127 } 1128 #endif 1129 1130 #ifdef INET6 1131 static void 1132 carp_send_na(struct carp_softc *sc) 1133 { 1134 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1135 struct ifaddr *ifa; 1136 struct in6_addr *in6; 1137 1138 CARP_FOREACH_IFA(sc, ifa) { 1139 if (ifa->ifa_addr->sa_family != AF_INET6) 1140 continue; 1141 1142 in6 = IFA_IN6(ifa); 1143 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1144 ND_NA_FLAG_OVERRIDE, 1, NULL); 1145 DELAY(1000); /* XXX */ 1146 } 1147 } 1148 1149 /* 1150 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1151 * matches and is not a carp address. Returns NULL otherwise. 1152 */ 1153 struct ifaddr * 1154 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1155 { 1156 struct ifaddr *ifa; 1157 1158 ifa = NULL; 1159 IF_ADDR_RLOCK(ifp); 1160 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1161 if (ifa->ifa_addr->sa_family != AF_INET6) 1162 continue; 1163 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1164 continue; 1165 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1166 ifa = NULL; 1167 else 1168 ifa_ref(ifa); 1169 break; 1170 } 1171 IF_ADDR_RUNLOCK(ifp); 1172 1173 return (ifa); 1174 } 1175 1176 caddr_t 1177 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1178 { 1179 struct ifaddr *ifa; 1180 1181 IF_ADDR_RLOCK(ifp); 1182 IFNET_FOREACH_IFA(ifp, ifa) 1183 if (ifa->ifa_addr->sa_family == AF_INET6 && 1184 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1185 struct carp_softc *sc = ifa->ifa_carp; 1186 struct m_tag *mtag; 1187 1188 IF_ADDR_RUNLOCK(ifp); 1189 1190 mtag = m_tag_get(PACKET_TAG_CARP, 1191 sizeof(struct carp_softc *), M_NOWAIT); 1192 if (mtag == NULL) 1193 /* Better a bit than nothing. */ 1194 return (LLADDR(&sc->sc_addr)); 1195 1196 bcopy(&sc, mtag + 1, sizeof(sc)); 1197 m_tag_prepend(m, mtag); 1198 1199 return (LLADDR(&sc->sc_addr)); 1200 } 1201 IF_ADDR_RUNLOCK(ifp); 1202 1203 return (NULL); 1204 } 1205 #endif /* INET6 */ 1206 1207 int 1208 carp_forus(struct ifnet *ifp, u_char *dhost) 1209 { 1210 struct carp_softc *sc; 1211 uint8_t *ena = dhost; 1212 1213 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1214 return (0); 1215 1216 CIF_LOCK(ifp->if_carp); 1217 IFNET_FOREACH_CARP(ifp, sc) { 1218 CARP_LOCK(sc); 1219 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1220 ETHER_ADDR_LEN)) { 1221 CARP_UNLOCK(sc); 1222 CIF_UNLOCK(ifp->if_carp); 1223 return (1); 1224 } 1225 CARP_UNLOCK(sc); 1226 } 1227 CIF_UNLOCK(ifp->if_carp); 1228 1229 return (0); 1230 } 1231 1232 /* Master down timeout event, executed in callout context. */ 1233 static void 1234 carp_master_down(void *v) 1235 { 1236 struct carp_softc *sc = v; 1237 1238 CARP_LOCK_ASSERT(sc); 1239 1240 CURVNET_SET(sc->sc_carpdev->if_vnet); 1241 if (sc->sc_state == BACKUP) { 1242 carp_master_down_locked(sc, "master timed out"); 1243 } 1244 CURVNET_RESTORE(); 1245 1246 CARP_UNLOCK(sc); 1247 } 1248 1249 static void 1250 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1251 { 1252 1253 CARP_LOCK_ASSERT(sc); 1254 1255 switch (sc->sc_state) { 1256 case BACKUP: 1257 carp_set_state(sc, MASTER, reason); 1258 carp_send_ad_locked(sc); 1259 #ifdef INET 1260 carp_send_arp(sc); 1261 #endif 1262 #ifdef INET6 1263 carp_send_na(sc); 1264 #endif 1265 carp_setrun(sc, 0); 1266 carp_addroute(sc); 1267 break; 1268 case INIT: 1269 case MASTER: 1270 #ifdef INVARIANTS 1271 panic("carp: VHID %u@%s: master_down event in %s state\n", 1272 sc->sc_vhid, 1273 sc->sc_carpdev->if_xname, 1274 sc->sc_state ? "MASTER" : "INIT"); 1275 #endif 1276 break; 1277 } 1278 } 1279 1280 /* 1281 * When in backup state, af indicates whether to reset the master down timer 1282 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1283 */ 1284 static void 1285 carp_setrun(struct carp_softc *sc, sa_family_t af) 1286 { 1287 struct timeval tv; 1288 1289 CARP_LOCK_ASSERT(sc); 1290 1291 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1292 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1293 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)) 1294 return; 1295 1296 switch (sc->sc_state) { 1297 case INIT: 1298 carp_set_state(sc, BACKUP, "initialization complete"); 1299 carp_setrun(sc, 0); 1300 break; 1301 case BACKUP: 1302 callout_stop(&sc->sc_ad_tmo); 1303 tv.tv_sec = 3 * sc->sc_advbase; 1304 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1305 switch (af) { 1306 #ifdef INET 1307 case AF_INET: 1308 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1309 carp_master_down, sc); 1310 break; 1311 #endif 1312 #ifdef INET6 1313 case AF_INET6: 1314 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1315 carp_master_down, sc); 1316 break; 1317 #endif 1318 default: 1319 #ifdef INET 1320 if (sc->sc_naddrs) 1321 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1322 carp_master_down, sc); 1323 #endif 1324 #ifdef INET6 1325 if (sc->sc_naddrs6) 1326 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1327 carp_master_down, sc); 1328 #endif 1329 break; 1330 } 1331 break; 1332 case MASTER: 1333 tv.tv_sec = sc->sc_advbase; 1334 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1335 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1336 carp_send_ad, sc); 1337 break; 1338 } 1339 } 1340 1341 /* 1342 * Setup multicast structures. 1343 */ 1344 static int 1345 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1346 { 1347 struct ifnet *ifp = cif->cif_ifp; 1348 int error = 0; 1349 1350 switch (sa) { 1351 #ifdef INET 1352 case AF_INET: 1353 { 1354 struct ip_moptions *imo = &cif->cif_imo; 1355 struct in_addr addr; 1356 1357 if (imo->imo_membership) 1358 return (0); 1359 1360 imo->imo_membership = (struct in_multi **)malloc( 1361 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, 1362 M_WAITOK); 1363 imo->imo_mfilters = NULL; 1364 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1365 imo->imo_multicast_vif = -1; 1366 1367 addr.s_addr = htonl(INADDR_CARP_GROUP); 1368 if ((error = in_joingroup(ifp, &addr, NULL, 1369 &imo->imo_membership[0])) != 0) { 1370 free(imo->imo_membership, M_CARP); 1371 break; 1372 } 1373 imo->imo_num_memberships++; 1374 imo->imo_multicast_ifp = ifp; 1375 imo->imo_multicast_ttl = CARP_DFLTTL; 1376 imo->imo_multicast_loop = 0; 1377 break; 1378 } 1379 #endif 1380 #ifdef INET6 1381 case AF_INET6: 1382 { 1383 struct ip6_moptions *im6o = &cif->cif_im6o; 1384 struct in6_addr in6; 1385 struct in6_multi *in6m; 1386 1387 if (im6o->im6o_membership) 1388 return (0); 1389 1390 im6o->im6o_membership = (struct in6_multi **)malloc( 1391 (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, 1392 M_ZERO | M_WAITOK); 1393 im6o->im6o_mfilters = NULL; 1394 im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; 1395 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1396 im6o->im6o_multicast_ifp = ifp; 1397 1398 /* Join IPv6 CARP multicast group. */ 1399 bzero(&in6, sizeof(in6)); 1400 in6.s6_addr16[0] = htons(0xff02); 1401 in6.s6_addr8[15] = 0x12; 1402 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1403 free(im6o->im6o_membership, M_CARP); 1404 break; 1405 } 1406 in6m = NULL; 1407 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { 1408 free(im6o->im6o_membership, M_CARP); 1409 break; 1410 } 1411 im6o->im6o_membership[0] = in6m; 1412 im6o->im6o_num_memberships++; 1413 1414 /* Join solicited multicast address. */ 1415 bzero(&in6, sizeof(in6)); 1416 in6.s6_addr16[0] = htons(0xff02); 1417 in6.s6_addr32[1] = 0; 1418 in6.s6_addr32[2] = htonl(1); 1419 in6.s6_addr32[3] = 0; 1420 in6.s6_addr8[12] = 0xff; 1421 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1422 in6_mc_leave(im6o->im6o_membership[0], NULL); 1423 free(im6o->im6o_membership, M_CARP); 1424 break; 1425 } 1426 in6m = NULL; 1427 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { 1428 in6_mc_leave(im6o->im6o_membership[0], NULL); 1429 free(im6o->im6o_membership, M_CARP); 1430 break; 1431 } 1432 im6o->im6o_membership[1] = in6m; 1433 im6o->im6o_num_memberships++; 1434 break; 1435 } 1436 #endif 1437 } 1438 1439 return (error); 1440 } 1441 1442 /* 1443 * Free multicast structures. 1444 */ 1445 static void 1446 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1447 { 1448 1449 sx_assert(&carp_sx, SA_XLOCKED); 1450 1451 switch (sa) { 1452 #ifdef INET 1453 case AF_INET: 1454 if (cif->cif_naddrs == 0) { 1455 struct ip_moptions *imo = &cif->cif_imo; 1456 1457 in_leavegroup(imo->imo_membership[0], NULL); 1458 KASSERT(imo->imo_mfilters == NULL, 1459 ("%s: imo_mfilters != NULL", __func__)); 1460 free(imo->imo_membership, M_CARP); 1461 imo->imo_membership = NULL; 1462 1463 } 1464 break; 1465 #endif 1466 #ifdef INET6 1467 case AF_INET6: 1468 if (cif->cif_naddrs6 == 0) { 1469 struct ip6_moptions *im6o = &cif->cif_im6o; 1470 1471 in6_mc_leave(im6o->im6o_membership[0], NULL); 1472 in6_mc_leave(im6o->im6o_membership[1], NULL); 1473 KASSERT(im6o->im6o_mfilters == NULL, 1474 ("%s: im6o_mfilters != NULL", __func__)); 1475 free(im6o->im6o_membership, M_CARP); 1476 im6o->im6o_membership = NULL; 1477 } 1478 break; 1479 #endif 1480 } 1481 } 1482 1483 int 1484 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1485 { 1486 struct m_tag *mtag; 1487 struct carp_softc *sc; 1488 1489 if (!sa) 1490 return (0); 1491 1492 switch (sa->sa_family) { 1493 #ifdef INET 1494 case AF_INET: 1495 break; 1496 #endif 1497 #ifdef INET6 1498 case AF_INET6: 1499 break; 1500 #endif 1501 default: 1502 return (0); 1503 } 1504 1505 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1506 if (mtag == NULL) 1507 return (0); 1508 1509 bcopy(mtag + 1, &sc, sizeof(sc)); 1510 1511 /* Set the source MAC address to the Virtual Router MAC Address. */ 1512 switch (ifp->if_type) { 1513 case IFT_ETHER: 1514 case IFT_BRIDGE: 1515 case IFT_L2VLAN: { 1516 struct ether_header *eh; 1517 1518 eh = mtod(m, struct ether_header *); 1519 eh->ether_shost[0] = 0; 1520 eh->ether_shost[1] = 0; 1521 eh->ether_shost[2] = 0x5e; 1522 eh->ether_shost[3] = 0; 1523 eh->ether_shost[4] = 1; 1524 eh->ether_shost[5] = sc->sc_vhid; 1525 } 1526 break; 1527 case IFT_FDDI: { 1528 struct fddi_header *fh; 1529 1530 fh = mtod(m, struct fddi_header *); 1531 fh->fddi_shost[0] = 0; 1532 fh->fddi_shost[1] = 0; 1533 fh->fddi_shost[2] = 0x5e; 1534 fh->fddi_shost[3] = 0; 1535 fh->fddi_shost[4] = 1; 1536 fh->fddi_shost[5] = sc->sc_vhid; 1537 } 1538 break; 1539 case IFT_ISO88025: { 1540 struct iso88025_header *th; 1541 th = mtod(m, struct iso88025_header *); 1542 th->iso88025_shost[0] = 3; 1543 th->iso88025_shost[1] = 0; 1544 th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); 1545 th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); 1546 th->iso88025_shost[4] = 0; 1547 th->iso88025_shost[5] = 0; 1548 } 1549 break; 1550 default: 1551 printf("%s: carp is not supported for the %d interface type\n", 1552 ifp->if_xname, ifp->if_type); 1553 return (EOPNOTSUPP); 1554 } 1555 1556 return (0); 1557 } 1558 1559 static struct carp_softc* 1560 carp_alloc(struct ifnet *ifp) 1561 { 1562 struct carp_softc *sc; 1563 struct carp_if *cif; 1564 1565 if ((cif = ifp->if_carp) == NULL) 1566 cif = carp_alloc_if(ifp); 1567 1568 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1569 1570 sc->sc_advbase = CARP_DFLTINTV; 1571 sc->sc_vhid = -1; /* required setting */ 1572 sc->sc_init_counter = 1; 1573 sc->sc_state = INIT; 1574 1575 sc->sc_ifasiz = sizeof(struct ifaddr *); 1576 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1577 sc->sc_carpdev = ifp; 1578 1579 CARP_LOCK_INIT(sc); 1580 #ifdef INET 1581 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1582 #endif 1583 #ifdef INET6 1584 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1585 #endif 1586 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1587 1588 CIF_LOCK(cif); 1589 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1590 CIF_UNLOCK(cif); 1591 1592 mtx_lock(&carp_mtx); 1593 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1594 mtx_unlock(&carp_mtx); 1595 1596 return (sc); 1597 } 1598 1599 static void 1600 carp_grow_ifas(struct carp_softc *sc) 1601 { 1602 struct ifaddr **new; 1603 1604 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1605 CARP_LOCK(sc); 1606 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1607 free(sc->sc_ifas, M_CARP); 1608 sc->sc_ifas = new; 1609 sc->sc_ifasiz *= 2; 1610 CARP_UNLOCK(sc); 1611 } 1612 1613 static void 1614 carp_destroy(struct carp_softc *sc) 1615 { 1616 struct ifnet *ifp = sc->sc_carpdev; 1617 struct carp_if *cif = ifp->if_carp; 1618 1619 sx_assert(&carp_sx, SA_XLOCKED); 1620 1621 if (sc->sc_suppress) 1622 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1623 CARP_UNLOCK(sc); 1624 1625 CIF_LOCK(cif); 1626 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1627 CIF_UNLOCK(cif); 1628 1629 mtx_lock(&carp_mtx); 1630 LIST_REMOVE(sc, sc_next); 1631 mtx_unlock(&carp_mtx); 1632 1633 callout_drain(&sc->sc_ad_tmo); 1634 #ifdef INET 1635 callout_drain(&sc->sc_md_tmo); 1636 #endif 1637 #ifdef INET6 1638 callout_drain(&sc->sc_md6_tmo); 1639 #endif 1640 CARP_LOCK_DESTROY(sc); 1641 1642 free(sc->sc_ifas, M_CARP); 1643 free(sc, M_CARP); 1644 } 1645 1646 static struct carp_if* 1647 carp_alloc_if(struct ifnet *ifp) 1648 { 1649 struct carp_if *cif; 1650 int error; 1651 1652 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1653 1654 if ((error = ifpromisc(ifp, 1)) != 0) 1655 printf("%s: ifpromisc(%s) failed: %d\n", 1656 __func__, ifp->if_xname, error); 1657 else 1658 cif->cif_flags |= CIF_PROMISC; 1659 1660 CIF_LOCK_INIT(cif); 1661 cif->cif_ifp = ifp; 1662 TAILQ_INIT(&cif->cif_vrs); 1663 1664 IF_ADDR_WLOCK(ifp); 1665 ifp->if_carp = cif; 1666 if_ref(ifp); 1667 IF_ADDR_WUNLOCK(ifp); 1668 1669 return (cif); 1670 } 1671 1672 static void 1673 carp_free_if(struct carp_if *cif) 1674 { 1675 struct ifnet *ifp = cif->cif_ifp; 1676 1677 CIF_LOCK_ASSERT(cif); 1678 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1679 __func__)); 1680 1681 IF_ADDR_WLOCK(ifp); 1682 ifp->if_carp = NULL; 1683 IF_ADDR_WUNLOCK(ifp); 1684 1685 CIF_LOCK_DESTROY(cif); 1686 1687 if (cif->cif_flags & CIF_PROMISC) 1688 ifpromisc(ifp, 0); 1689 if_rele(ifp); 1690 1691 free(cif, M_CARP); 1692 } 1693 1694 static void 1695 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1696 { 1697 1698 CARP_LOCK(sc); 1699 carpr->carpr_state = sc->sc_state; 1700 carpr->carpr_vhid = sc->sc_vhid; 1701 carpr->carpr_advbase = sc->sc_advbase; 1702 carpr->carpr_advskew = sc->sc_advskew; 1703 if (priv) 1704 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1705 else 1706 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1707 CARP_UNLOCK(sc); 1708 } 1709 1710 int 1711 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1712 { 1713 struct carpreq carpr; 1714 struct ifnet *ifp; 1715 struct carp_softc *sc = NULL; 1716 int error = 0, locked = 0; 1717 1718 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 1719 return (error); 1720 1721 ifp = ifunit_ref(ifr->ifr_name); 1722 if (ifp == NULL) 1723 return (ENXIO); 1724 1725 switch (ifp->if_type) { 1726 case IFT_ETHER: 1727 case IFT_L2VLAN: 1728 case IFT_BRIDGE: 1729 case IFT_FDDI: 1730 case IFT_ISO88025: 1731 break; 1732 default: 1733 error = EOPNOTSUPP; 1734 goto out; 1735 } 1736 1737 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1738 error = EADDRNOTAVAIL; 1739 goto out; 1740 } 1741 1742 sx_xlock(&carp_sx); 1743 switch (cmd) { 1744 case SIOCSVH: 1745 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1746 break; 1747 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1748 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1749 error = EINVAL; 1750 break; 1751 } 1752 1753 if (ifp->if_carp) { 1754 CIF_LOCK(ifp->if_carp); 1755 IFNET_FOREACH_CARP(ifp, sc) 1756 if (sc->sc_vhid == carpr.carpr_vhid) 1757 break; 1758 CIF_UNLOCK(ifp->if_carp); 1759 } 1760 if (sc == NULL) { 1761 sc = carp_alloc(ifp); 1762 CARP_LOCK(sc); 1763 sc->sc_vhid = carpr.carpr_vhid; 1764 LLADDR(&sc->sc_addr)[0] = 0; 1765 LLADDR(&sc->sc_addr)[1] = 0; 1766 LLADDR(&sc->sc_addr)[2] = 0x5e; 1767 LLADDR(&sc->sc_addr)[3] = 0; 1768 LLADDR(&sc->sc_addr)[4] = 1; 1769 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1770 } else 1771 CARP_LOCK(sc); 1772 locked = 1; 1773 if (carpr.carpr_advbase > 0) { 1774 if (carpr.carpr_advbase > 255 || 1775 carpr.carpr_advbase < CARP_DFLTINTV) { 1776 error = EINVAL; 1777 break; 1778 } 1779 sc->sc_advbase = carpr.carpr_advbase; 1780 } 1781 if (carpr.carpr_advskew >= 255) { 1782 error = EINVAL; 1783 break; 1784 } 1785 sc->sc_advskew = carpr.carpr_advskew; 1786 if (carpr.carpr_key[0] != '\0') { 1787 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1788 carp_hmac_prepare(sc); 1789 } 1790 if (sc->sc_state != INIT && 1791 carpr.carpr_state != sc->sc_state) { 1792 switch (carpr.carpr_state) { 1793 case BACKUP: 1794 callout_stop(&sc->sc_ad_tmo); 1795 carp_set_state(sc, BACKUP, 1796 "user requested via ifconfig"); 1797 carp_setrun(sc, 0); 1798 carp_delroute(sc); 1799 break; 1800 case MASTER: 1801 carp_master_down_locked(sc, 1802 "user requested via ifconfig"); 1803 break; 1804 default: 1805 break; 1806 } 1807 } 1808 break; 1809 1810 case SIOCGVH: 1811 { 1812 int priveleged; 1813 1814 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1815 error = EINVAL; 1816 break; 1817 } 1818 if (carpr.carpr_count < 1) { 1819 error = EMSGSIZE; 1820 break; 1821 } 1822 if (ifp->if_carp == NULL) { 1823 error = ENOENT; 1824 break; 1825 } 1826 1827 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1828 if (carpr.carpr_vhid != 0) { 1829 CIF_LOCK(ifp->if_carp); 1830 IFNET_FOREACH_CARP(ifp, sc) 1831 if (sc->sc_vhid == carpr.carpr_vhid) 1832 break; 1833 CIF_UNLOCK(ifp->if_carp); 1834 if (sc == NULL) { 1835 error = ENOENT; 1836 break; 1837 } 1838 carp_carprcp(&carpr, sc, priveleged); 1839 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1840 } else { 1841 int i, count; 1842 1843 count = 0; 1844 CIF_LOCK(ifp->if_carp); 1845 IFNET_FOREACH_CARP(ifp, sc) 1846 count++; 1847 1848 if (count > carpr.carpr_count) { 1849 CIF_UNLOCK(ifp->if_carp); 1850 error = EMSGSIZE; 1851 break; 1852 } 1853 1854 i = 0; 1855 IFNET_FOREACH_CARP(ifp, sc) { 1856 carp_carprcp(&carpr, sc, priveleged); 1857 carpr.carpr_count = count; 1858 error = copyout(&carpr, ifr->ifr_data + 1859 (i * sizeof(carpr)), sizeof(carpr)); 1860 if (error) { 1861 CIF_UNLOCK(ifp->if_carp); 1862 break; 1863 } 1864 i++; 1865 } 1866 CIF_UNLOCK(ifp->if_carp); 1867 } 1868 break; 1869 } 1870 default: 1871 error = EINVAL; 1872 } 1873 sx_xunlock(&carp_sx); 1874 1875 out: 1876 if (locked) 1877 CARP_UNLOCK(sc); 1878 if_rele(ifp); 1879 1880 return (error); 1881 } 1882 1883 static int 1884 carp_get_vhid(struct ifaddr *ifa) 1885 { 1886 1887 if (ifa == NULL || ifa->ifa_carp == NULL) 1888 return (0); 1889 1890 return (ifa->ifa_carp->sc_vhid); 1891 } 1892 1893 int 1894 carp_attach(struct ifaddr *ifa, int vhid) 1895 { 1896 struct ifnet *ifp = ifa->ifa_ifp; 1897 struct carp_if *cif = ifp->if_carp; 1898 struct carp_softc *sc; 1899 int index, error; 1900 1901 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1902 1903 switch (ifa->ifa_addr->sa_family) { 1904 #ifdef INET 1905 case AF_INET: 1906 #endif 1907 #ifdef INET6 1908 case AF_INET6: 1909 #endif 1910 break; 1911 default: 1912 return (EPROTOTYPE); 1913 } 1914 1915 sx_xlock(&carp_sx); 1916 if (ifp->if_carp == NULL) { 1917 sx_xunlock(&carp_sx); 1918 return (ENOPROTOOPT); 1919 } 1920 1921 CIF_LOCK(cif); 1922 IFNET_FOREACH_CARP(ifp, sc) 1923 if (sc->sc_vhid == vhid) 1924 break; 1925 CIF_UNLOCK(cif); 1926 if (sc == NULL) { 1927 sx_xunlock(&carp_sx); 1928 return (ENOENT); 1929 } 1930 1931 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1932 if (error) { 1933 CIF_FREE(cif); 1934 sx_xunlock(&carp_sx); 1935 return (error); 1936 } 1937 1938 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1939 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1940 carp_grow_ifas(sc); 1941 1942 switch (ifa->ifa_addr->sa_family) { 1943 #ifdef INET 1944 case AF_INET: 1945 cif->cif_naddrs++; 1946 sc->sc_naddrs++; 1947 break; 1948 #endif 1949 #ifdef INET6 1950 case AF_INET6: 1951 cif->cif_naddrs6++; 1952 sc->sc_naddrs6++; 1953 break; 1954 #endif 1955 } 1956 1957 ifa_ref(ifa); 1958 1959 CARP_LOCK(sc); 1960 sc->sc_ifas[index - 1] = ifa; 1961 ifa->ifa_carp = sc; 1962 carp_hmac_prepare(sc); 1963 carp_sc_state(sc); 1964 CARP_UNLOCK(sc); 1965 1966 sx_xunlock(&carp_sx); 1967 1968 return (0); 1969 } 1970 1971 void 1972 carp_detach(struct ifaddr *ifa) 1973 { 1974 struct ifnet *ifp = ifa->ifa_ifp; 1975 struct carp_if *cif = ifp->if_carp; 1976 struct carp_softc *sc = ifa->ifa_carp; 1977 int i, index; 1978 1979 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1980 1981 sx_xlock(&carp_sx); 1982 1983 CARP_LOCK(sc); 1984 /* Shift array. */ 1985 index = sc->sc_naddrs + sc->sc_naddrs6; 1986 for (i = 0; i < index; i++) 1987 if (sc->sc_ifas[i] == ifa) 1988 break; 1989 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1990 for (; i < index - 1; i++) 1991 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1992 sc->sc_ifas[index - 1] = NULL; 1993 1994 switch (ifa->ifa_addr->sa_family) { 1995 #ifdef INET 1996 case AF_INET: 1997 cif->cif_naddrs--; 1998 sc->sc_naddrs--; 1999 break; 2000 #endif 2001 #ifdef INET6 2002 case AF_INET6: 2003 cif->cif_naddrs6--; 2004 sc->sc_naddrs6--; 2005 break; 2006 #endif 2007 } 2008 2009 carp_ifa_delroute(ifa); 2010 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2011 2012 ifa->ifa_carp = NULL; 2013 ifa_free(ifa); 2014 2015 carp_hmac_prepare(sc); 2016 carp_sc_state(sc); 2017 2018 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2019 carp_destroy(sc); 2020 else 2021 CARP_UNLOCK(sc); 2022 2023 CIF_FREE(cif); 2024 2025 sx_xunlock(&carp_sx); 2026 } 2027 2028 static void 2029 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2030 { 2031 2032 CARP_LOCK_ASSERT(sc); 2033 2034 if (sc->sc_state != state) { 2035 const char *carp_states[] = { CARP_STATES }; 2036 char subsys[IFNAMSIZ+5]; 2037 2038 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2039 sc->sc_carpdev->if_xname); 2040 2041 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2042 carp_states[sc->sc_state], carp_states[state], reason); 2043 2044 sc->sc_state = state; 2045 2046 devctl_notify("CARP", subsys, carp_states[state], NULL); 2047 } 2048 } 2049 2050 static void 2051 carp_linkstate(struct ifnet *ifp) 2052 { 2053 struct carp_softc *sc; 2054 2055 CIF_LOCK(ifp->if_carp); 2056 IFNET_FOREACH_CARP(ifp, sc) { 2057 CARP_LOCK(sc); 2058 carp_sc_state(sc); 2059 CARP_UNLOCK(sc); 2060 } 2061 CIF_UNLOCK(ifp->if_carp); 2062 } 2063 2064 static void 2065 carp_sc_state(struct carp_softc *sc) 2066 { 2067 2068 CARP_LOCK_ASSERT(sc); 2069 2070 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2071 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2072 callout_stop(&sc->sc_ad_tmo); 2073 #ifdef INET 2074 callout_stop(&sc->sc_md_tmo); 2075 #endif 2076 #ifdef INET6 2077 callout_stop(&sc->sc_md6_tmo); 2078 #endif 2079 carp_set_state(sc, INIT, "hardware interface down"); 2080 carp_setrun(sc, 0); 2081 if (!sc->sc_suppress) 2082 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2083 sc->sc_suppress = 1; 2084 } else { 2085 carp_set_state(sc, INIT, "hardware interface up"); 2086 carp_setrun(sc, 0); 2087 if (sc->sc_suppress) 2088 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2089 sc->sc_suppress = 0; 2090 } 2091 } 2092 2093 static void 2094 carp_demote_adj(int adj, char *reason) 2095 { 2096 atomic_add_int(&V_carp_demotion, adj); 2097 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2098 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2099 } 2100 2101 static int 2102 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2103 { 2104 int new, error; 2105 2106 new = V_carp_demotion; 2107 error = sysctl_handle_int(oidp, &new, 0, req); 2108 if (error || !req->newptr) 2109 return (error); 2110 2111 carp_demote_adj(new, "sysctl"); 2112 2113 return (0); 2114 } 2115 2116 #ifdef INET 2117 extern struct domain inetdomain; 2118 static struct protosw in_carp_protosw = { 2119 .pr_type = SOCK_RAW, 2120 .pr_domain = &inetdomain, 2121 .pr_protocol = IPPROTO_CARP, 2122 .pr_flags = PR_ATOMIC|PR_ADDR, 2123 .pr_input = carp_input, 2124 .pr_output = rip_output, 2125 .pr_ctloutput = rip_ctloutput, 2126 .pr_usrreqs = &rip_usrreqs 2127 }; 2128 #endif 2129 2130 #ifdef INET6 2131 extern struct domain inet6domain; 2132 static struct protosw in6_carp_protosw = { 2133 .pr_type = SOCK_RAW, 2134 .pr_domain = &inet6domain, 2135 .pr_protocol = IPPROTO_CARP, 2136 .pr_flags = PR_ATOMIC|PR_ADDR, 2137 .pr_input = carp6_input, 2138 .pr_output = rip6_output, 2139 .pr_ctloutput = rip6_ctloutput, 2140 .pr_usrreqs = &rip6_usrreqs 2141 }; 2142 #endif 2143 2144 static void 2145 carp_mod_cleanup(void) 2146 { 2147 2148 #ifdef INET 2149 if (proto_reg[CARP_INET] == 0) { 2150 (void)ipproto_unregister(IPPROTO_CARP); 2151 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2152 proto_reg[CARP_INET] = -1; 2153 } 2154 carp_iamatch_p = NULL; 2155 #endif 2156 #ifdef INET6 2157 if (proto_reg[CARP_INET6] == 0) { 2158 (void)ip6proto_unregister(IPPROTO_CARP); 2159 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2160 proto_reg[CARP_INET6] = -1; 2161 } 2162 carp_iamatch6_p = NULL; 2163 carp_macmatch6_p = NULL; 2164 #endif 2165 carp_ioctl_p = NULL; 2166 carp_attach_p = NULL; 2167 carp_detach_p = NULL; 2168 carp_get_vhid_p = NULL; 2169 carp_linkstate_p = NULL; 2170 carp_forus_p = NULL; 2171 carp_output_p = NULL; 2172 carp_demote_adj_p = NULL; 2173 carp_master_p = NULL; 2174 mtx_unlock(&carp_mtx); 2175 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2176 mtx_destroy(&carp_mtx); 2177 sx_destroy(&carp_sx); 2178 } 2179 2180 static int 2181 carp_mod_load(void) 2182 { 2183 int err; 2184 2185 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2186 sx_init(&carp_sx, "carp_sx"); 2187 LIST_INIT(&carp_list); 2188 carp_get_vhid_p = carp_get_vhid; 2189 carp_forus_p = carp_forus; 2190 carp_output_p = carp_output; 2191 carp_linkstate_p = carp_linkstate; 2192 carp_ioctl_p = carp_ioctl; 2193 carp_attach_p = carp_attach; 2194 carp_detach_p = carp_detach; 2195 carp_demote_adj_p = carp_demote_adj; 2196 carp_master_p = carp_master; 2197 #ifdef INET6 2198 carp_iamatch6_p = carp_iamatch6; 2199 carp_macmatch6_p = carp_macmatch6; 2200 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2201 (struct protosw *)&in6_carp_protosw); 2202 if (proto_reg[CARP_INET6]) { 2203 printf("carp: error %d attaching to PF_INET6\n", 2204 proto_reg[CARP_INET6]); 2205 carp_mod_cleanup(); 2206 return (proto_reg[CARP_INET6]); 2207 } 2208 err = ip6proto_register(IPPROTO_CARP); 2209 if (err) { 2210 printf("carp: error %d registering with INET6\n", err); 2211 carp_mod_cleanup(); 2212 return (err); 2213 } 2214 #endif 2215 #ifdef INET 2216 carp_iamatch_p = carp_iamatch; 2217 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2218 if (proto_reg[CARP_INET]) { 2219 printf("carp: error %d attaching to PF_INET\n", 2220 proto_reg[CARP_INET]); 2221 carp_mod_cleanup(); 2222 return (proto_reg[CARP_INET]); 2223 } 2224 err = ipproto_register(IPPROTO_CARP); 2225 if (err) { 2226 printf("carp: error %d registering with INET\n", err); 2227 carp_mod_cleanup(); 2228 return (err); 2229 } 2230 #endif 2231 return (0); 2232 } 2233 2234 static int 2235 carp_modevent(module_t mod, int type, void *data) 2236 { 2237 switch (type) { 2238 case MOD_LOAD: 2239 return carp_mod_load(); 2240 /* NOTREACHED */ 2241 case MOD_UNLOAD: 2242 mtx_lock(&carp_mtx); 2243 if (LIST_EMPTY(&carp_list)) 2244 carp_mod_cleanup(); 2245 else { 2246 mtx_unlock(&carp_mtx); 2247 return (EBUSY); 2248 } 2249 break; 2250 2251 default: 2252 return (EINVAL); 2253 } 2254 2255 return (0); 2256 } 2257 2258 static moduledata_t carp_mod = { 2259 "carp", 2260 carp_modevent, 2261 0 2262 }; 2263 2264 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2265