1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_bpf.h" 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/devctl.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/module.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/sockio.h> 52 #include <sys/sysctl.h> 53 #include <sys/syslog.h> 54 #include <sys/taskqueue.h> 55 #include <sys/counter.h> 56 57 #include <net/ethernet.h> 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_dl.h> 61 #include <net/if_llatbl.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip.h> 71 #include <machine/in_cksum.h> 72 #endif 73 #ifdef INET 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #endif 77 78 #ifdef INET6 79 #include <netinet/icmp6.h> 80 #include <netinet/ip6.h> 81 #include <netinet6/in6_var.h> 82 #include <netinet6/ip6_var.h> 83 #include <netinet6/scope6_var.h> 84 #include <netinet6/nd6.h> 85 #endif 86 87 #include <crypto/sha1.h> 88 89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 90 91 struct carp_softc { 92 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 93 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 94 struct sockaddr_dl sc_addr; /* Our link level address. */ 95 struct callout sc_ad_tmo; /* Advertising timeout. */ 96 #ifdef INET 97 struct callout sc_md_tmo; /* Master down timeout. */ 98 #endif 99 #ifdef INET6 100 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 101 #endif 102 struct mtx sc_mtx; 103 104 int sc_vhid; 105 int sc_advskew; 106 int sc_advbase; 107 108 int sc_naddrs; 109 int sc_naddrs6; 110 int sc_ifasiz; 111 enum { INIT = 0, BACKUP, MASTER } sc_state; 112 int sc_suppress; 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_init_counter; 119 uint64_t sc_counter; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char sc_key[CARP_KEY_LEN]; 124 unsigned char sc_pad[CARP_HMAC_PAD]; 125 SHA1_CTX sc_sha1; 126 127 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 128 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 129 }; 130 131 struct carp_if { 132 #ifdef INET 133 int cif_naddrs; 134 #endif 135 #ifdef INET6 136 int cif_naddrs6; 137 #endif 138 TAILQ_HEAD(, carp_softc) cif_vrs; 139 #ifdef INET 140 struct ip_moptions cif_imo; 141 #endif 142 #ifdef INET6 143 struct ip6_moptions cif_im6o; 144 #endif 145 struct ifnet *cif_ifp; 146 struct mtx cif_mtx; 147 uint32_t cif_flags; 148 #define CIF_PROMISC 0x00000001 149 }; 150 151 #define CARP_INET 0 152 #define CARP_INET6 1 153 static int proto_reg[] = {-1, -1}; 154 155 /* 156 * Brief design of carp(4). 157 * 158 * Any carp-capable ifnet may have a list of carp softcs hanging off 159 * its ifp->if_carp pointer. Each softc represents one unique virtual 160 * host id, or vhid. The softc has a back pointer to the ifnet. All 161 * softcs are joined in a global list, which has quite limited use. 162 * 163 * Any interface address that takes part in CARP negotiation has a 164 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 165 * AF_INET or AF_INET6 address. 166 * 167 * Although, one can get the softc's backpointer to ifnet and traverse 168 * through its ifp->if_addrhead queue to find all interface addresses 169 * involved in CARP, we keep a growable array of ifaddr pointers. This 170 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 171 * do calls into the network stack, thus avoiding LORs. 172 * 173 * Locking: 174 * 175 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 176 * callout-driven events and ioctl()s. 177 * 178 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 179 * To traverse the global list we use the mutex carp_mtx. 180 * 181 * Known issues with locking: 182 * 183 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 184 * counting is done on the softc. 185 * - On module unload we may race (?) with packet processing thread 186 * dereferencing our function pointers. 187 */ 188 189 /* Accept incoming CARP packets. */ 190 VNET_DEFINE_STATIC(int, carp_allow) = 1; 191 #define V_carp_allow VNET(carp_allow) 192 193 /* Set DSCP in outgoing CARP packets. */ 194 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 195 #define V_carp_dscp VNET(carp_dscp) 196 197 /* Preempt slower nodes. */ 198 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 199 #define V_carp_preempt VNET(carp_preempt) 200 201 /* Log level. */ 202 VNET_DEFINE_STATIC(int, carp_log) = 1; 203 #define V_carp_log VNET(carp_log) 204 205 /* Global advskew demotion. */ 206 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 207 #define V_carp_demotion VNET(carp_demotion) 208 209 /* Send error demotion factor. */ 210 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 211 #define V_carp_senderr_adj VNET(carp_senderr_adj) 212 213 /* Iface down demotion factor. */ 214 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 215 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 216 217 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 218 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 219 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 220 221 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 222 "CARP"); 223 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 224 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 225 0, 0, carp_allow_sysctl, "I", 226 "Accept incoming CARP packets"); 227 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 228 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 229 0, 0, carp_dscp_sysctl, "I", 230 "DSCP value for carp packets"); 231 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 232 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 233 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 234 &VNET_NAME(carp_log), 0, "CARP log level"); 235 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 236 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 237 0, 0, carp_demote_adj_sysctl, "I", 238 "Adjust demotion factor (skew of advskew)"); 239 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 240 CTLFLAG_VNET | CTLFLAG_RW, 241 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 242 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 243 CTLFLAG_VNET | CTLFLAG_RW, 244 &VNET_NAME(carp_ifdown_adj), 0, 245 "Interface down demotion factor adjustment"); 246 247 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 248 VNET_PCPUSTAT_SYSINIT(carpstats); 249 VNET_PCPUSTAT_SYSUNINIT(carpstats); 250 251 #define CARPSTATS_ADD(name, val) \ 252 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 253 sizeof(uint64_t)], (val)) 254 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 255 256 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 257 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 258 259 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 260 NULL, MTX_DEF) 261 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 262 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 263 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 264 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 265 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 266 NULL, MTX_DEF) 267 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 268 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 269 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 270 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 271 #define CIF_FREE(cif) do { \ 272 CIF_LOCK(cif); \ 273 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 274 carp_free_if(cif); \ 275 else \ 276 CIF_UNLOCK(cif); \ 277 } while (0) 278 279 #define CARP_LOG(...) do { \ 280 if (V_carp_log > 0) \ 281 log(LOG_INFO, "carp: " __VA_ARGS__); \ 282 } while (0) 283 284 #define CARP_DEBUG(...) do { \ 285 if (V_carp_log > 1) \ 286 log(LOG_DEBUG, __VA_ARGS__); \ 287 } while (0) 288 289 #define IFNET_FOREACH_IFA(ifp, ifa) \ 290 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 291 if ((ifa)->ifa_carp != NULL) 292 293 #define CARP_FOREACH_IFA(sc, ifa) \ 294 CARP_LOCK_ASSERT(sc); \ 295 for (int _i = 0; \ 296 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 297 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 298 ++_i) 299 300 #define IFNET_FOREACH_CARP(ifp, sc) \ 301 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 302 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 303 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 304 305 #define DEMOTE_ADVSKEW(sc) \ 306 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 307 CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) 308 309 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 310 static struct carp_softc 311 *carp_alloc(struct ifnet *); 312 static void carp_destroy(struct carp_softc *); 313 static struct carp_if 314 *carp_alloc_if(struct ifnet *); 315 static void carp_free_if(struct carp_if *); 316 static void carp_set_state(struct carp_softc *, int, const char* reason); 317 static void carp_sc_state(struct carp_softc *); 318 static void carp_setrun(struct carp_softc *, sa_family_t); 319 static void carp_master_down(void *); 320 static void carp_master_down_locked(struct carp_softc *, 321 const char* reason); 322 static void carp_send_ad(void *); 323 static void carp_send_ad_locked(struct carp_softc *); 324 static void carp_addroute(struct carp_softc *); 325 static void carp_ifa_addroute(struct ifaddr *); 326 static void carp_delroute(struct carp_softc *); 327 static void carp_ifa_delroute(struct ifaddr *); 328 static void carp_send_ad_all(void *, int); 329 static void carp_demote_adj(int, char *); 330 331 static LIST_HEAD(, carp_softc) carp_list; 332 static struct mtx carp_mtx; 333 static struct sx carp_sx; 334 static struct task carp_sendall_task = 335 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 336 337 static void 338 carp_hmac_prepare(struct carp_softc *sc) 339 { 340 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 341 uint8_t vhid = sc->sc_vhid & 0xff; 342 struct ifaddr *ifa; 343 int i, found; 344 #ifdef INET 345 struct in_addr last, cur, in; 346 #endif 347 #ifdef INET6 348 struct in6_addr last6, cur6, in6; 349 #endif 350 351 CARP_LOCK_ASSERT(sc); 352 353 /* Compute ipad from key. */ 354 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 355 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 356 for (i = 0; i < sizeof(sc->sc_pad); i++) 357 sc->sc_pad[i] ^= 0x36; 358 359 /* Precompute first part of inner hash. */ 360 SHA1Init(&sc->sc_sha1); 361 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 362 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 363 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 364 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 365 #ifdef INET 366 cur.s_addr = 0; 367 do { 368 found = 0; 369 last = cur; 370 cur.s_addr = 0xffffffff; 371 CARP_FOREACH_IFA(sc, ifa) { 372 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 373 if (ifa->ifa_addr->sa_family == AF_INET && 374 ntohl(in.s_addr) > ntohl(last.s_addr) && 375 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 376 cur.s_addr = in.s_addr; 377 found++; 378 } 379 } 380 if (found) 381 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 382 } while (found); 383 #endif /* INET */ 384 #ifdef INET6 385 memset(&cur6, 0, sizeof(cur6)); 386 do { 387 found = 0; 388 last6 = cur6; 389 memset(&cur6, 0xff, sizeof(cur6)); 390 CARP_FOREACH_IFA(sc, ifa) { 391 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 392 if (IN6_IS_SCOPE_EMBED(&in6)) 393 in6.s6_addr16[1] = 0; 394 if (ifa->ifa_addr->sa_family == AF_INET6 && 395 memcmp(&in6, &last6, sizeof(in6)) > 0 && 396 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 397 cur6 = in6; 398 found++; 399 } 400 } 401 if (found) 402 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 403 } while (found); 404 #endif /* INET6 */ 405 406 /* convert ipad to opad */ 407 for (i = 0; i < sizeof(sc->sc_pad); i++) 408 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 409 } 410 411 static void 412 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 413 unsigned char md[20]) 414 { 415 SHA1_CTX sha1ctx; 416 417 CARP_LOCK_ASSERT(sc); 418 419 /* fetch first half of inner hash */ 420 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 421 422 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 423 SHA1Final(md, &sha1ctx); 424 425 /* outer hash */ 426 SHA1Init(&sha1ctx); 427 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 428 SHA1Update(&sha1ctx, md, 20); 429 SHA1Final(md, &sha1ctx); 430 } 431 432 static int 433 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 434 unsigned char md[20]) 435 { 436 unsigned char md2[20]; 437 438 CARP_LOCK_ASSERT(sc); 439 440 carp_hmac_generate(sc, counter, md2); 441 442 return (bcmp(md, md2, sizeof(md2))); 443 } 444 445 /* 446 * process input packet. 447 * we have rearranged checks order compared to the rfc, 448 * but it seems more efficient this way or not possible otherwise. 449 */ 450 #ifdef INET 451 int 452 carp_input(struct mbuf **mp, int *offp, int proto) 453 { 454 struct mbuf *m = *mp; 455 struct ip *ip = mtod(m, struct ip *); 456 struct carp_header *ch; 457 int iplen, len; 458 459 iplen = *offp; 460 *mp = NULL; 461 462 CARPSTATS_INC(carps_ipackets); 463 464 if (!V_carp_allow) { 465 m_freem(m); 466 return (IPPROTO_DONE); 467 } 468 469 /* verify that the IP TTL is 255. */ 470 if (ip->ip_ttl != CARP_DFLTTL) { 471 CARPSTATS_INC(carps_badttl); 472 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 473 ip->ip_ttl, 474 m->m_pkthdr.rcvif->if_xname); 475 m_freem(m); 476 return (IPPROTO_DONE); 477 } 478 479 iplen = ip->ip_hl << 2; 480 481 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 482 CARPSTATS_INC(carps_badlen); 483 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 484 "on %s\n", __func__, m->m_len - sizeof(struct ip), 485 m->m_pkthdr.rcvif->if_xname); 486 m_freem(m); 487 return (IPPROTO_DONE); 488 } 489 490 if (iplen + sizeof(*ch) < m->m_len) { 491 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 492 CARPSTATS_INC(carps_hdrops); 493 CARP_DEBUG("%s: pullup failed\n", __func__); 494 return (IPPROTO_DONE); 495 } 496 ip = mtod(m, struct ip *); 497 } 498 ch = (struct carp_header *)((char *)ip + iplen); 499 500 /* 501 * verify that the received packet length is 502 * equal to the CARP header 503 */ 504 len = iplen + sizeof(*ch); 505 if (len > m->m_pkthdr.len) { 506 CARPSTATS_INC(carps_badlen); 507 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 508 m->m_pkthdr.len, 509 m->m_pkthdr.rcvif->if_xname); 510 m_freem(m); 511 return (IPPROTO_DONE); 512 } 513 514 if ((m = m_pullup(m, len)) == NULL) { 515 CARPSTATS_INC(carps_hdrops); 516 return (IPPROTO_DONE); 517 } 518 ip = mtod(m, struct ip *); 519 ch = (struct carp_header *)((char *)ip + iplen); 520 521 /* verify the CARP checksum */ 522 m->m_data += iplen; 523 if (in_cksum(m, len - iplen)) { 524 CARPSTATS_INC(carps_badsum); 525 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 526 m->m_pkthdr.rcvif->if_xname); 527 m_freem(m); 528 return (IPPROTO_DONE); 529 } 530 m->m_data -= iplen; 531 532 carp_input_c(m, ch, AF_INET); 533 return (IPPROTO_DONE); 534 } 535 #endif 536 537 #ifdef INET6 538 int 539 carp6_input(struct mbuf **mp, int *offp, int proto) 540 { 541 struct mbuf *m = *mp; 542 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 543 struct carp_header *ch; 544 u_int len; 545 546 CARPSTATS_INC(carps_ipackets6); 547 548 if (!V_carp_allow) { 549 m_freem(m); 550 return (IPPROTO_DONE); 551 } 552 553 /* check if received on a valid carp interface */ 554 if (m->m_pkthdr.rcvif->if_carp == NULL) { 555 CARPSTATS_INC(carps_badif); 556 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 557 __func__, m->m_pkthdr.rcvif->if_xname); 558 m_freem(m); 559 return (IPPROTO_DONE); 560 } 561 562 /* verify that the IP TTL is 255 */ 563 if (ip6->ip6_hlim != CARP_DFLTTL) { 564 CARPSTATS_INC(carps_badttl); 565 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 566 ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); 567 m_freem(m); 568 return (IPPROTO_DONE); 569 } 570 571 /* verify that we have a complete carp packet */ 572 if (m->m_len < *offp + sizeof(*ch)) { 573 len = m->m_len; 574 m = m_pullup(m, *offp + sizeof(*ch)); 575 if (m == NULL) { 576 CARPSTATS_INC(carps_badlen); 577 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 578 return (IPPROTO_DONE); 579 } 580 } 581 ch = (struct carp_header *)(mtod(m, char *) + *offp); 582 583 /* verify the CARP checksum */ 584 m->m_data += *offp; 585 if (in_cksum(m, sizeof(*ch))) { 586 CARPSTATS_INC(carps_badsum); 587 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 588 m->m_pkthdr.rcvif->if_xname); 589 m_freem(m); 590 return (IPPROTO_DONE); 591 } 592 m->m_data -= *offp; 593 594 carp_input_c(m, ch, AF_INET6); 595 return (IPPROTO_DONE); 596 } 597 #endif /* INET6 */ 598 599 /* 600 * This routine should not be necessary at all, but some switches 601 * (VMWare ESX vswitches) can echo our own packets back at us, 602 * and we must ignore them or they will cause us to drop out of 603 * MASTER mode. 604 * 605 * We cannot catch all cases of network loops. Instead, what we 606 * do here is catch any packet that arrives with a carp header 607 * with a VHID of 0, that comes from an address that is our own. 608 * These packets are by definition "from us" (even if they are from 609 * a misconfigured host that is pretending to be us). 610 * 611 * The VHID test is outside this mini-function. 612 */ 613 static int 614 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 615 { 616 #ifdef INET 617 struct ip *ip4; 618 struct in_addr in4; 619 #endif 620 #ifdef INET6 621 struct ip6_hdr *ip6; 622 struct in6_addr in6; 623 #endif 624 625 switch (af) { 626 #ifdef INET 627 case AF_INET: 628 ip4 = mtod(m, struct ip *); 629 in4 = ifatoia(ifa)->ia_addr.sin_addr; 630 return (in4.s_addr == ip4->ip_src.s_addr); 631 #endif 632 #ifdef INET6 633 case AF_INET6: 634 ip6 = mtod(m, struct ip6_hdr *); 635 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 636 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 637 #endif 638 default: 639 break; 640 } 641 return (0); 642 } 643 644 static void 645 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 646 { 647 struct ifnet *ifp = m->m_pkthdr.rcvif; 648 struct ifaddr *ifa, *match; 649 struct carp_softc *sc; 650 uint64_t tmp_counter; 651 struct timeval sc_tv, ch_tv; 652 int error; 653 654 NET_EPOCH_ASSERT(); 655 656 /* 657 * Verify that the VHID is valid on the receiving interface. 658 * 659 * There should be just one match. If there are none 660 * the VHID is not valid and we drop the packet. If 661 * there are multiple VHID matches, take just the first 662 * one, for compatibility with previous code. While we're 663 * scanning, check for obvious loops in the network topology 664 * (these should never happen, and as noted above, we may 665 * miss real loops; this is just a double-check). 666 */ 667 error = 0; 668 match = NULL; 669 IFNET_FOREACH_IFA(ifp, ifa) { 670 if (match == NULL && ifa->ifa_carp != NULL && 671 ifa->ifa_addr->sa_family == af && 672 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 673 match = ifa; 674 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 675 error = ELOOP; 676 } 677 ifa = error ? NULL : match; 678 if (ifa != NULL) 679 ifa_ref(ifa); 680 681 if (ifa == NULL) { 682 if (error == ELOOP) { 683 CARP_DEBUG("dropping looped packet on interface %s\n", 684 ifp->if_xname); 685 CARPSTATS_INC(carps_badif); /* ??? */ 686 } else { 687 CARPSTATS_INC(carps_badvhid); 688 } 689 m_freem(m); 690 return; 691 } 692 693 /* verify the CARP version. */ 694 if (ch->carp_version != CARP_VERSION) { 695 CARPSTATS_INC(carps_badver); 696 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, 697 ch->carp_version); 698 ifa_free(ifa); 699 m_freem(m); 700 return; 701 } 702 703 sc = ifa->ifa_carp; 704 CARP_LOCK(sc); 705 ifa_free(ifa); 706 707 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 708 CARPSTATS_INC(carps_badauth); 709 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 710 sc->sc_vhid, ifp->if_xname); 711 goto out; 712 } 713 714 tmp_counter = ntohl(ch->carp_counter[0]); 715 tmp_counter = tmp_counter<<32; 716 tmp_counter += ntohl(ch->carp_counter[1]); 717 718 /* XXX Replay protection goes here */ 719 720 sc->sc_init_counter = 0; 721 sc->sc_counter = tmp_counter; 722 723 sc_tv.tv_sec = sc->sc_advbase; 724 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 725 ch_tv.tv_sec = ch->carp_advbase; 726 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 727 728 switch (sc->sc_state) { 729 case INIT: 730 break; 731 case MASTER: 732 /* 733 * If we receive an advertisement from a master who's going to 734 * be more frequent than us, go into BACKUP state. 735 */ 736 if (timevalcmp(&sc_tv, &ch_tv, >) || 737 timevalcmp(&sc_tv, &ch_tv, ==)) { 738 callout_stop(&sc->sc_ad_tmo); 739 carp_set_state(sc, BACKUP, 740 "more frequent advertisement received"); 741 carp_setrun(sc, 0); 742 carp_delroute(sc); 743 } 744 break; 745 case BACKUP: 746 /* 747 * If we're pre-empting masters who advertise slower than us, 748 * and this one claims to be slower, treat him as down. 749 */ 750 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 751 carp_master_down_locked(sc, 752 "preempting a slower master"); 753 break; 754 } 755 756 /* 757 * If the master is going to advertise at such a low frequency 758 * that he's guaranteed to time out, we'd might as well just 759 * treat him as timed out now. 760 */ 761 sc_tv.tv_sec = sc->sc_advbase * 3; 762 if (timevalcmp(&sc_tv, &ch_tv, <)) { 763 carp_master_down_locked(sc, "master will time out"); 764 break; 765 } 766 767 /* 768 * Otherwise, we reset the counter and wait for the next 769 * advertisement. 770 */ 771 carp_setrun(sc, af); 772 break; 773 } 774 775 out: 776 CARP_UNLOCK(sc); 777 m_freem(m); 778 } 779 780 static int 781 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 782 { 783 struct m_tag *mtag; 784 785 if (sc->sc_init_counter) { 786 /* this could also be seconds since unix epoch */ 787 sc->sc_counter = arc4random(); 788 sc->sc_counter = sc->sc_counter << 32; 789 sc->sc_counter += arc4random(); 790 } else 791 sc->sc_counter++; 792 793 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 794 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 795 796 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 797 798 /* Tag packet for carp_output */ 799 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 800 M_NOWAIT)) == NULL) { 801 m_freem(m); 802 CARPSTATS_INC(carps_onomem); 803 return (ENOMEM); 804 } 805 bcopy(&sc, mtag + 1, sizeof(sc)); 806 m_tag_prepend(m, mtag); 807 808 return (0); 809 } 810 811 /* 812 * To avoid LORs and possible recursions this function shouldn't 813 * be called directly, but scheduled via taskqueue. 814 */ 815 static void 816 carp_send_ad_all(void *ctx __unused, int pending __unused) 817 { 818 struct carp_softc *sc; 819 struct epoch_tracker et; 820 821 NET_EPOCH_ENTER(et); 822 mtx_lock(&carp_mtx); 823 LIST_FOREACH(sc, &carp_list, sc_next) 824 if (sc->sc_state == MASTER) { 825 CARP_LOCK(sc); 826 CURVNET_SET(sc->sc_carpdev->if_vnet); 827 carp_send_ad_locked(sc); 828 CURVNET_RESTORE(); 829 CARP_UNLOCK(sc); 830 } 831 mtx_unlock(&carp_mtx); 832 NET_EPOCH_EXIT(et); 833 } 834 835 /* Send a periodic advertisement, executed in callout context. */ 836 static void 837 carp_send_ad(void *v) 838 { 839 struct carp_softc *sc = v; 840 struct epoch_tracker et; 841 842 NET_EPOCH_ENTER(et); 843 CARP_LOCK_ASSERT(sc); 844 CURVNET_SET(sc->sc_carpdev->if_vnet); 845 carp_send_ad_locked(sc); 846 CURVNET_RESTORE(); 847 CARP_UNLOCK(sc); 848 NET_EPOCH_EXIT(et); 849 } 850 851 static void 852 carp_send_ad_error(struct carp_softc *sc, int error) 853 { 854 855 if (error) { 856 if (sc->sc_sendad_errors < INT_MAX) 857 sc->sc_sendad_errors++; 858 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 859 static const char fmt[] = "send error %d on %s"; 860 char msg[sizeof(fmt) + IFNAMSIZ]; 861 862 sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); 863 carp_demote_adj(V_carp_senderr_adj, msg); 864 } 865 sc->sc_sendad_success = 0; 866 } else { 867 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && 868 ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 869 static const char fmt[] = "send ok on %s"; 870 char msg[sizeof(fmt) + IFNAMSIZ]; 871 872 sprintf(msg, fmt, sc->sc_carpdev->if_xname); 873 carp_demote_adj(-V_carp_senderr_adj, msg); 874 sc->sc_sendad_errors = 0; 875 } else 876 sc->sc_sendad_errors = 0; 877 } 878 } 879 880 /* 881 * Pick the best ifaddr on the given ifp for sending CARP 882 * advertisements. 883 * 884 * "Best" here is defined by ifa_preferred(). This function is much 885 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 886 * 887 * (This could be simplified to return the actual address, except that 888 * it has a different format in AF_INET and AF_INET6.) 889 */ 890 static struct ifaddr * 891 carp_best_ifa(int af, struct ifnet *ifp) 892 { 893 struct ifaddr *ifa, *best; 894 895 NET_EPOCH_ASSERT(); 896 897 if (af >= AF_MAX) 898 return (NULL); 899 best = NULL; 900 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 901 if (ifa->ifa_addr->sa_family == af && 902 (best == NULL || ifa_preferred(best, ifa))) 903 best = ifa; 904 } 905 if (best != NULL) 906 ifa_ref(best); 907 return (best); 908 } 909 910 static void 911 carp_send_ad_locked(struct carp_softc *sc) 912 { 913 struct carp_header ch; 914 struct timeval tv; 915 struct ifaddr *ifa; 916 struct carp_header *ch_ptr; 917 struct mbuf *m; 918 int len, advskew; 919 920 NET_EPOCH_ASSERT(); 921 CARP_LOCK_ASSERT(sc); 922 923 advskew = DEMOTE_ADVSKEW(sc); 924 tv.tv_sec = sc->sc_advbase; 925 tv.tv_usec = advskew * 1000000 / 256; 926 927 ch.carp_version = CARP_VERSION; 928 ch.carp_type = CARP_ADVERTISEMENT; 929 ch.carp_vhid = sc->sc_vhid; 930 ch.carp_advbase = sc->sc_advbase; 931 ch.carp_advskew = advskew; 932 ch.carp_authlen = 7; /* XXX DEFINE */ 933 ch.carp_pad1 = 0; /* must be zero */ 934 ch.carp_cksum = 0; 935 936 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 937 938 #ifdef INET 939 if (sc->sc_naddrs) { 940 struct ip *ip; 941 942 m = m_gethdr(M_NOWAIT, MT_DATA); 943 if (m == NULL) { 944 CARPSTATS_INC(carps_onomem); 945 goto resched; 946 } 947 len = sizeof(*ip) + sizeof(ch); 948 m->m_pkthdr.len = len; 949 m->m_pkthdr.rcvif = NULL; 950 m->m_len = len; 951 M_ALIGN(m, m->m_len); 952 m->m_flags |= M_MCAST; 953 ip = mtod(m, struct ip *); 954 ip->ip_v = IPVERSION; 955 ip->ip_hl = sizeof(*ip) >> 2; 956 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 957 ip->ip_len = htons(len); 958 ip->ip_off = htons(IP_DF); 959 ip->ip_ttl = CARP_DFLTTL; 960 ip->ip_p = IPPROTO_CARP; 961 ip->ip_sum = 0; 962 ip_fillid(ip); 963 964 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 965 if (ifa != NULL) { 966 ip->ip_src.s_addr = 967 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 968 ifa_free(ifa); 969 } else 970 ip->ip_src.s_addr = 0; 971 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 972 973 ch_ptr = (struct carp_header *)(&ip[1]); 974 bcopy(&ch, ch_ptr, sizeof(ch)); 975 if (carp_prepare_ad(m, sc, ch_ptr)) 976 goto resched; 977 978 m->m_data += sizeof(*ip); 979 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 980 m->m_data -= sizeof(*ip); 981 982 CARPSTATS_INC(carps_opackets); 983 984 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 985 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 986 } 987 #endif /* INET */ 988 #ifdef INET6 989 if (sc->sc_naddrs6) { 990 struct ip6_hdr *ip6; 991 992 m = m_gethdr(M_NOWAIT, MT_DATA); 993 if (m == NULL) { 994 CARPSTATS_INC(carps_onomem); 995 goto resched; 996 } 997 len = sizeof(*ip6) + sizeof(ch); 998 m->m_pkthdr.len = len; 999 m->m_pkthdr.rcvif = NULL; 1000 m->m_len = len; 1001 M_ALIGN(m, m->m_len); 1002 m->m_flags |= M_MCAST; 1003 ip6 = mtod(m, struct ip6_hdr *); 1004 bzero(ip6, sizeof(*ip6)); 1005 ip6->ip6_vfc |= IPV6_VERSION; 1006 /* Traffic class isn't defined in ip6 struct instead 1007 * it gets offset into flowid field */ 1008 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1009 IPTOS_DSCP_OFFSET)); 1010 ip6->ip6_hlim = CARP_DFLTTL; 1011 ip6->ip6_nxt = IPPROTO_CARP; 1012 1013 /* set the source address */ 1014 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1015 if (ifa != NULL) { 1016 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1017 sizeof(struct in6_addr)); 1018 ifa_free(ifa); 1019 } else 1020 /* This should never happen with IPv6. */ 1021 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1022 1023 /* Set the multicast destination. */ 1024 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1025 ip6->ip6_dst.s6_addr8[15] = 0x12; 1026 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1027 m_freem(m); 1028 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1029 goto resched; 1030 } 1031 1032 ch_ptr = (struct carp_header *)(&ip6[1]); 1033 bcopy(&ch, ch_ptr, sizeof(ch)); 1034 if (carp_prepare_ad(m, sc, ch_ptr)) 1035 goto resched; 1036 1037 m->m_data += sizeof(*ip6); 1038 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1039 m->m_data -= sizeof(*ip6); 1040 1041 CARPSTATS_INC(carps_opackets6); 1042 1043 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1044 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1045 } 1046 #endif /* INET6 */ 1047 1048 resched: 1049 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1050 } 1051 1052 static void 1053 carp_addroute(struct carp_softc *sc) 1054 { 1055 struct ifaddr *ifa; 1056 1057 CARP_FOREACH_IFA(sc, ifa) 1058 carp_ifa_addroute(ifa); 1059 } 1060 1061 static void 1062 carp_ifa_addroute(struct ifaddr *ifa) 1063 { 1064 1065 switch (ifa->ifa_addr->sa_family) { 1066 #ifdef INET 1067 case AF_INET: 1068 in_addprefix(ifatoia(ifa), RTF_UP); 1069 ifa_add_loopback_route(ifa, 1070 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1071 break; 1072 #endif 1073 #ifdef INET6 1074 case AF_INET6: 1075 ifa_add_loopback_route(ifa, 1076 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1077 nd6_add_ifa_lle(ifatoia6(ifa)); 1078 break; 1079 #endif 1080 } 1081 } 1082 1083 static void 1084 carp_delroute(struct carp_softc *sc) 1085 { 1086 struct ifaddr *ifa; 1087 1088 CARP_FOREACH_IFA(sc, ifa) 1089 carp_ifa_delroute(ifa); 1090 } 1091 1092 static void 1093 carp_ifa_delroute(struct ifaddr *ifa) 1094 { 1095 1096 switch (ifa->ifa_addr->sa_family) { 1097 #ifdef INET 1098 case AF_INET: 1099 ifa_del_loopback_route(ifa, 1100 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1101 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1102 break; 1103 #endif 1104 #ifdef INET6 1105 case AF_INET6: 1106 ifa_del_loopback_route(ifa, 1107 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1108 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1109 break; 1110 #endif 1111 } 1112 } 1113 1114 int 1115 carp_master(struct ifaddr *ifa) 1116 { 1117 struct carp_softc *sc = ifa->ifa_carp; 1118 1119 return (sc->sc_state == MASTER); 1120 } 1121 1122 #ifdef INET 1123 /* 1124 * Broadcast a gratuitous ARP request containing 1125 * the virtual router MAC address for each IP address 1126 * associated with the virtual router. 1127 */ 1128 static void 1129 carp_send_arp(struct carp_softc *sc) 1130 { 1131 struct ifaddr *ifa; 1132 struct in_addr addr; 1133 1134 NET_EPOCH_ASSERT(); 1135 1136 CARP_FOREACH_IFA(sc, ifa) { 1137 if (ifa->ifa_addr->sa_family != AF_INET) 1138 continue; 1139 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1140 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1141 } 1142 } 1143 1144 int 1145 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1146 { 1147 struct carp_softc *sc = ifa->ifa_carp; 1148 1149 if (sc->sc_state == MASTER) { 1150 *enaddr = LLADDR(&sc->sc_addr); 1151 return (1); 1152 } 1153 1154 return (0); 1155 } 1156 #endif 1157 1158 #ifdef INET6 1159 static void 1160 carp_send_na(struct carp_softc *sc) 1161 { 1162 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1163 struct ifaddr *ifa; 1164 struct in6_addr *in6; 1165 1166 CARP_FOREACH_IFA(sc, ifa) { 1167 if (ifa->ifa_addr->sa_family != AF_INET6) 1168 continue; 1169 1170 in6 = IFA_IN6(ifa); 1171 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1172 ND_NA_FLAG_OVERRIDE, 1, NULL); 1173 DELAY(1000); /* XXX */ 1174 } 1175 } 1176 1177 /* 1178 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1179 * matches and is not a carp address. Returns NULL otherwise. 1180 */ 1181 struct ifaddr * 1182 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1183 { 1184 struct ifaddr *ifa; 1185 1186 NET_EPOCH_ASSERT(); 1187 1188 ifa = NULL; 1189 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1190 if (ifa->ifa_addr->sa_family != AF_INET6) 1191 continue; 1192 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1193 continue; 1194 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1195 ifa = NULL; 1196 else 1197 ifa_ref(ifa); 1198 break; 1199 } 1200 1201 return (ifa); 1202 } 1203 1204 char * 1205 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1206 { 1207 struct ifaddr *ifa; 1208 1209 NET_EPOCH_ASSERT(); 1210 1211 IFNET_FOREACH_IFA(ifp, ifa) 1212 if (ifa->ifa_addr->sa_family == AF_INET6 && 1213 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1214 struct carp_softc *sc = ifa->ifa_carp; 1215 struct m_tag *mtag; 1216 1217 mtag = m_tag_get(PACKET_TAG_CARP, 1218 sizeof(struct carp_softc *), M_NOWAIT); 1219 if (mtag == NULL) 1220 /* Better a bit than nothing. */ 1221 return (LLADDR(&sc->sc_addr)); 1222 1223 bcopy(&sc, mtag + 1, sizeof(sc)); 1224 m_tag_prepend(m, mtag); 1225 1226 return (LLADDR(&sc->sc_addr)); 1227 } 1228 1229 return (NULL); 1230 } 1231 #endif /* INET6 */ 1232 1233 int 1234 carp_forus(struct ifnet *ifp, u_char *dhost) 1235 { 1236 struct carp_softc *sc; 1237 uint8_t *ena = dhost; 1238 1239 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1240 return (0); 1241 1242 CIF_LOCK(ifp->if_carp); 1243 IFNET_FOREACH_CARP(ifp, sc) { 1244 /* 1245 * CARP_LOCK() is not here, since would protect nothing, but 1246 * cause deadlock with if_bridge, calling this under its lock. 1247 */ 1248 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1249 ETHER_ADDR_LEN)) { 1250 CIF_UNLOCK(ifp->if_carp); 1251 return (1); 1252 } 1253 } 1254 CIF_UNLOCK(ifp->if_carp); 1255 1256 return (0); 1257 } 1258 1259 /* Master down timeout event, executed in callout context. */ 1260 static void 1261 carp_master_down(void *v) 1262 { 1263 struct carp_softc *sc = v; 1264 struct epoch_tracker et; 1265 1266 NET_EPOCH_ENTER(et); 1267 CARP_LOCK_ASSERT(sc); 1268 1269 CURVNET_SET(sc->sc_carpdev->if_vnet); 1270 if (sc->sc_state == BACKUP) { 1271 carp_master_down_locked(sc, "master timed out"); 1272 } 1273 CURVNET_RESTORE(); 1274 1275 CARP_UNLOCK(sc); 1276 NET_EPOCH_EXIT(et); 1277 } 1278 1279 static void 1280 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1281 { 1282 1283 NET_EPOCH_ASSERT(); 1284 CARP_LOCK_ASSERT(sc); 1285 1286 switch (sc->sc_state) { 1287 case BACKUP: 1288 carp_set_state(sc, MASTER, reason); 1289 carp_send_ad_locked(sc); 1290 #ifdef INET 1291 carp_send_arp(sc); 1292 #endif 1293 #ifdef INET6 1294 carp_send_na(sc); 1295 #endif 1296 carp_setrun(sc, 0); 1297 carp_addroute(sc); 1298 break; 1299 case INIT: 1300 case MASTER: 1301 #ifdef INVARIANTS 1302 panic("carp: VHID %u@%s: master_down event in %s state\n", 1303 sc->sc_vhid, 1304 sc->sc_carpdev->if_xname, 1305 sc->sc_state ? "MASTER" : "INIT"); 1306 #endif 1307 break; 1308 } 1309 } 1310 1311 /* 1312 * When in backup state, af indicates whether to reset the master down timer 1313 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1314 */ 1315 static void 1316 carp_setrun(struct carp_softc *sc, sa_family_t af) 1317 { 1318 struct timeval tv; 1319 1320 CARP_LOCK_ASSERT(sc); 1321 1322 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1323 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1324 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1325 !V_carp_allow) 1326 return; 1327 1328 switch (sc->sc_state) { 1329 case INIT: 1330 carp_set_state(sc, BACKUP, "initialization complete"); 1331 carp_setrun(sc, 0); 1332 break; 1333 case BACKUP: 1334 callout_stop(&sc->sc_ad_tmo); 1335 tv.tv_sec = 3 * sc->sc_advbase; 1336 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1337 switch (af) { 1338 #ifdef INET 1339 case AF_INET: 1340 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1341 carp_master_down, sc); 1342 break; 1343 #endif 1344 #ifdef INET6 1345 case AF_INET6: 1346 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1347 carp_master_down, sc); 1348 break; 1349 #endif 1350 default: 1351 #ifdef INET 1352 if (sc->sc_naddrs) 1353 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1354 carp_master_down, sc); 1355 #endif 1356 #ifdef INET6 1357 if (sc->sc_naddrs6) 1358 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1359 carp_master_down, sc); 1360 #endif 1361 break; 1362 } 1363 break; 1364 case MASTER: 1365 tv.tv_sec = sc->sc_advbase; 1366 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1367 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1368 carp_send_ad, sc); 1369 break; 1370 } 1371 } 1372 1373 /* 1374 * Setup multicast structures. 1375 */ 1376 static int 1377 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1378 { 1379 struct ifnet *ifp = cif->cif_ifp; 1380 int error = 0; 1381 1382 switch (sa) { 1383 #ifdef INET 1384 case AF_INET: 1385 { 1386 struct ip_moptions *imo = &cif->cif_imo; 1387 struct in_mfilter *imf; 1388 struct in_addr addr; 1389 1390 if (ip_mfilter_first(&imo->imo_head) != NULL) 1391 return (0); 1392 1393 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1394 ip_mfilter_init(&imo->imo_head); 1395 imo->imo_multicast_vif = -1; 1396 1397 addr.s_addr = htonl(INADDR_CARP_GROUP); 1398 if ((error = in_joingroup(ifp, &addr, NULL, 1399 &imf->imf_inm)) != 0) { 1400 ip_mfilter_free(imf); 1401 break; 1402 } 1403 1404 ip_mfilter_insert(&imo->imo_head, imf); 1405 imo->imo_multicast_ifp = ifp; 1406 imo->imo_multicast_ttl = CARP_DFLTTL; 1407 imo->imo_multicast_loop = 0; 1408 break; 1409 } 1410 #endif 1411 #ifdef INET6 1412 case AF_INET6: 1413 { 1414 struct ip6_moptions *im6o = &cif->cif_im6o; 1415 struct in6_mfilter *im6f[2]; 1416 struct in6_addr in6; 1417 1418 if (ip6_mfilter_first(&im6o->im6o_head)) 1419 return (0); 1420 1421 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1422 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1423 1424 ip6_mfilter_init(&im6o->im6o_head); 1425 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1426 im6o->im6o_multicast_ifp = ifp; 1427 1428 /* Join IPv6 CARP multicast group. */ 1429 bzero(&in6, sizeof(in6)); 1430 in6.s6_addr16[0] = htons(0xff02); 1431 in6.s6_addr8[15] = 0x12; 1432 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1433 ip6_mfilter_free(im6f[0]); 1434 ip6_mfilter_free(im6f[1]); 1435 break; 1436 } 1437 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1438 ip6_mfilter_free(im6f[0]); 1439 ip6_mfilter_free(im6f[1]); 1440 break; 1441 } 1442 1443 /* Join solicited multicast address. */ 1444 bzero(&in6, sizeof(in6)); 1445 in6.s6_addr16[0] = htons(0xff02); 1446 in6.s6_addr32[1] = 0; 1447 in6.s6_addr32[2] = htonl(1); 1448 in6.s6_addr32[3] = 0; 1449 in6.s6_addr8[12] = 0xff; 1450 1451 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1452 ip6_mfilter_free(im6f[0]); 1453 ip6_mfilter_free(im6f[1]); 1454 break; 1455 } 1456 1457 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1458 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1459 ip6_mfilter_free(im6f[0]); 1460 ip6_mfilter_free(im6f[1]); 1461 break; 1462 } 1463 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1464 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1465 break; 1466 } 1467 #endif 1468 } 1469 1470 return (error); 1471 } 1472 1473 /* 1474 * Free multicast structures. 1475 */ 1476 static void 1477 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1478 { 1479 #ifdef INET 1480 struct ip_moptions *imo = &cif->cif_imo; 1481 struct in_mfilter *imf; 1482 #endif 1483 #ifdef INET6 1484 struct ip6_moptions *im6o = &cif->cif_im6o; 1485 struct in6_mfilter *im6f; 1486 #endif 1487 sx_assert(&carp_sx, SA_XLOCKED); 1488 1489 switch (sa) { 1490 #ifdef INET 1491 case AF_INET: 1492 if (cif->cif_naddrs != 0) 1493 break; 1494 1495 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1496 ip_mfilter_remove(&imo->imo_head, imf); 1497 in_leavegroup(imf->imf_inm, NULL); 1498 ip_mfilter_free(imf); 1499 } 1500 break; 1501 #endif 1502 #ifdef INET6 1503 case AF_INET6: 1504 if (cif->cif_naddrs6 != 0) 1505 break; 1506 1507 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 1508 ip6_mfilter_remove(&im6o->im6o_head, im6f); 1509 in6_leavegroup(im6f->im6f_in6m, NULL); 1510 ip6_mfilter_free(im6f); 1511 } 1512 break; 1513 #endif 1514 } 1515 } 1516 1517 int 1518 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1519 { 1520 struct m_tag *mtag; 1521 struct carp_softc *sc; 1522 1523 if (!sa) 1524 return (0); 1525 1526 switch (sa->sa_family) { 1527 #ifdef INET 1528 case AF_INET: 1529 break; 1530 #endif 1531 #ifdef INET6 1532 case AF_INET6: 1533 break; 1534 #endif 1535 default: 1536 return (0); 1537 } 1538 1539 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1540 if (mtag == NULL) 1541 return (0); 1542 1543 bcopy(mtag + 1, &sc, sizeof(sc)); 1544 1545 /* Set the source MAC address to the Virtual Router MAC Address. */ 1546 switch (ifp->if_type) { 1547 case IFT_ETHER: 1548 case IFT_BRIDGE: 1549 case IFT_L2VLAN: { 1550 struct ether_header *eh; 1551 1552 eh = mtod(m, struct ether_header *); 1553 eh->ether_shost[0] = 0; 1554 eh->ether_shost[1] = 0; 1555 eh->ether_shost[2] = 0x5e; 1556 eh->ether_shost[3] = 0; 1557 eh->ether_shost[4] = 1; 1558 eh->ether_shost[5] = sc->sc_vhid; 1559 } 1560 break; 1561 default: 1562 printf("%s: carp is not supported for the %d interface type\n", 1563 ifp->if_xname, ifp->if_type); 1564 return (EOPNOTSUPP); 1565 } 1566 1567 return (0); 1568 } 1569 1570 static struct carp_softc* 1571 carp_alloc(struct ifnet *ifp) 1572 { 1573 struct carp_softc *sc; 1574 struct carp_if *cif; 1575 1576 sx_assert(&carp_sx, SA_XLOCKED); 1577 1578 if ((cif = ifp->if_carp) == NULL) 1579 cif = carp_alloc_if(ifp); 1580 1581 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1582 1583 sc->sc_advbase = CARP_DFLTINTV; 1584 sc->sc_vhid = -1; /* required setting */ 1585 sc->sc_init_counter = 1; 1586 sc->sc_state = INIT; 1587 1588 sc->sc_ifasiz = sizeof(struct ifaddr *); 1589 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1590 sc->sc_carpdev = ifp; 1591 1592 CARP_LOCK_INIT(sc); 1593 #ifdef INET 1594 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1595 #endif 1596 #ifdef INET6 1597 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1598 #endif 1599 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1600 1601 CIF_LOCK(cif); 1602 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1603 CIF_UNLOCK(cif); 1604 1605 mtx_lock(&carp_mtx); 1606 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1607 mtx_unlock(&carp_mtx); 1608 1609 return (sc); 1610 } 1611 1612 static void 1613 carp_grow_ifas(struct carp_softc *sc) 1614 { 1615 struct ifaddr **new; 1616 1617 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1618 CARP_LOCK(sc); 1619 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1620 free(sc->sc_ifas, M_CARP); 1621 sc->sc_ifas = new; 1622 sc->sc_ifasiz *= 2; 1623 CARP_UNLOCK(sc); 1624 } 1625 1626 static void 1627 carp_destroy(struct carp_softc *sc) 1628 { 1629 struct ifnet *ifp = sc->sc_carpdev; 1630 struct carp_if *cif = ifp->if_carp; 1631 1632 sx_assert(&carp_sx, SA_XLOCKED); 1633 1634 if (sc->sc_suppress) 1635 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1636 CARP_UNLOCK(sc); 1637 1638 CIF_LOCK(cif); 1639 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1640 CIF_UNLOCK(cif); 1641 1642 mtx_lock(&carp_mtx); 1643 LIST_REMOVE(sc, sc_next); 1644 mtx_unlock(&carp_mtx); 1645 1646 callout_drain(&sc->sc_ad_tmo); 1647 #ifdef INET 1648 callout_drain(&sc->sc_md_tmo); 1649 #endif 1650 #ifdef INET6 1651 callout_drain(&sc->sc_md6_tmo); 1652 #endif 1653 CARP_LOCK_DESTROY(sc); 1654 1655 free(sc->sc_ifas, M_CARP); 1656 free(sc, M_CARP); 1657 } 1658 1659 static struct carp_if* 1660 carp_alloc_if(struct ifnet *ifp) 1661 { 1662 struct carp_if *cif; 1663 int error; 1664 1665 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1666 1667 if ((error = ifpromisc(ifp, 1)) != 0) 1668 printf("%s: ifpromisc(%s) failed: %d\n", 1669 __func__, ifp->if_xname, error); 1670 else 1671 cif->cif_flags |= CIF_PROMISC; 1672 1673 CIF_LOCK_INIT(cif); 1674 cif->cif_ifp = ifp; 1675 TAILQ_INIT(&cif->cif_vrs); 1676 1677 IF_ADDR_WLOCK(ifp); 1678 ifp->if_carp = cif; 1679 if_ref(ifp); 1680 IF_ADDR_WUNLOCK(ifp); 1681 1682 return (cif); 1683 } 1684 1685 static void 1686 carp_free_if(struct carp_if *cif) 1687 { 1688 struct ifnet *ifp = cif->cif_ifp; 1689 1690 CIF_LOCK_ASSERT(cif); 1691 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1692 __func__)); 1693 1694 IF_ADDR_WLOCK(ifp); 1695 ifp->if_carp = NULL; 1696 IF_ADDR_WUNLOCK(ifp); 1697 1698 CIF_LOCK_DESTROY(cif); 1699 1700 if (cif->cif_flags & CIF_PROMISC) 1701 ifpromisc(ifp, 0); 1702 if_rele(ifp); 1703 1704 free(cif, M_CARP); 1705 } 1706 1707 static void 1708 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) 1709 { 1710 1711 CARP_LOCK(sc); 1712 carpr->carpr_state = sc->sc_state; 1713 carpr->carpr_vhid = sc->sc_vhid; 1714 carpr->carpr_advbase = sc->sc_advbase; 1715 carpr->carpr_advskew = sc->sc_advskew; 1716 if (priv) 1717 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1718 else 1719 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1720 CARP_UNLOCK(sc); 1721 } 1722 1723 int 1724 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1725 { 1726 struct carpreq carpr; 1727 struct ifnet *ifp; 1728 struct carp_softc *sc = NULL; 1729 int error = 0, locked = 0; 1730 1731 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 1732 return (error); 1733 1734 ifp = ifunit_ref(ifr->ifr_name); 1735 if (ifp == NULL) 1736 return (ENXIO); 1737 1738 switch (ifp->if_type) { 1739 case IFT_ETHER: 1740 case IFT_L2VLAN: 1741 case IFT_BRIDGE: 1742 break; 1743 default: 1744 error = EOPNOTSUPP; 1745 goto out; 1746 } 1747 1748 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1749 error = EADDRNOTAVAIL; 1750 goto out; 1751 } 1752 1753 sx_xlock(&carp_sx); 1754 switch (cmd) { 1755 case SIOCSVH: 1756 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1757 break; 1758 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || 1759 carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { 1760 error = EINVAL; 1761 break; 1762 } 1763 1764 if (ifp->if_carp) { 1765 IFNET_FOREACH_CARP(ifp, sc) 1766 if (sc->sc_vhid == carpr.carpr_vhid) 1767 break; 1768 } 1769 if (sc == NULL) { 1770 sc = carp_alloc(ifp); 1771 CARP_LOCK(sc); 1772 sc->sc_vhid = carpr.carpr_vhid; 1773 LLADDR(&sc->sc_addr)[0] = 0; 1774 LLADDR(&sc->sc_addr)[1] = 0; 1775 LLADDR(&sc->sc_addr)[2] = 0x5e; 1776 LLADDR(&sc->sc_addr)[3] = 0; 1777 LLADDR(&sc->sc_addr)[4] = 1; 1778 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1779 } else 1780 CARP_LOCK(sc); 1781 locked = 1; 1782 if (carpr.carpr_advbase > 0) { 1783 if (carpr.carpr_advbase > 255 || 1784 carpr.carpr_advbase < CARP_DFLTINTV) { 1785 error = EINVAL; 1786 break; 1787 } 1788 sc->sc_advbase = carpr.carpr_advbase; 1789 } 1790 if (carpr.carpr_advskew >= 255) { 1791 error = EINVAL; 1792 break; 1793 } 1794 sc->sc_advskew = carpr.carpr_advskew; 1795 if (carpr.carpr_key[0] != '\0') { 1796 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1797 carp_hmac_prepare(sc); 1798 } 1799 if (sc->sc_state != INIT && 1800 carpr.carpr_state != sc->sc_state) { 1801 switch (carpr.carpr_state) { 1802 case BACKUP: 1803 callout_stop(&sc->sc_ad_tmo); 1804 carp_set_state(sc, BACKUP, 1805 "user requested via ifconfig"); 1806 carp_setrun(sc, 0); 1807 carp_delroute(sc); 1808 break; 1809 case MASTER: 1810 carp_master_down_locked(sc, 1811 "user requested via ifconfig"); 1812 break; 1813 default: 1814 break; 1815 } 1816 } 1817 break; 1818 1819 case SIOCGVH: 1820 { 1821 int priveleged; 1822 1823 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { 1824 error = EINVAL; 1825 break; 1826 } 1827 if (carpr.carpr_count < 1) { 1828 error = EMSGSIZE; 1829 break; 1830 } 1831 if (ifp->if_carp == NULL) { 1832 error = ENOENT; 1833 break; 1834 } 1835 1836 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); 1837 if (carpr.carpr_vhid != 0) { 1838 IFNET_FOREACH_CARP(ifp, sc) 1839 if (sc->sc_vhid == carpr.carpr_vhid) 1840 break; 1841 if (sc == NULL) { 1842 error = ENOENT; 1843 break; 1844 } 1845 carp_carprcp(&carpr, sc, priveleged); 1846 error = copyout(&carpr, ifr_data_get_ptr(ifr), 1847 sizeof(carpr)); 1848 } else { 1849 int i, count; 1850 1851 count = 0; 1852 IFNET_FOREACH_CARP(ifp, sc) 1853 count++; 1854 1855 if (count > carpr.carpr_count) { 1856 CIF_UNLOCK(ifp->if_carp); 1857 error = EMSGSIZE; 1858 break; 1859 } 1860 1861 i = 0; 1862 IFNET_FOREACH_CARP(ifp, sc) { 1863 carp_carprcp(&carpr, sc, priveleged); 1864 carpr.carpr_count = count; 1865 error = copyout(&carpr, 1866 (char *)ifr_data_get_ptr(ifr) + 1867 (i * sizeof(carpr)), sizeof(carpr)); 1868 if (error) { 1869 CIF_UNLOCK(ifp->if_carp); 1870 break; 1871 } 1872 i++; 1873 } 1874 } 1875 break; 1876 } 1877 default: 1878 error = EINVAL; 1879 } 1880 sx_xunlock(&carp_sx); 1881 1882 out: 1883 if (locked) 1884 CARP_UNLOCK(sc); 1885 if_rele(ifp); 1886 1887 return (error); 1888 } 1889 1890 static int 1891 carp_get_vhid(struct ifaddr *ifa) 1892 { 1893 1894 if (ifa == NULL || ifa->ifa_carp == NULL) 1895 return (0); 1896 1897 return (ifa->ifa_carp->sc_vhid); 1898 } 1899 1900 int 1901 carp_attach(struct ifaddr *ifa, int vhid) 1902 { 1903 struct ifnet *ifp = ifa->ifa_ifp; 1904 struct carp_if *cif = ifp->if_carp; 1905 struct carp_softc *sc; 1906 int index, error; 1907 1908 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1909 1910 switch (ifa->ifa_addr->sa_family) { 1911 #ifdef INET 1912 case AF_INET: 1913 #endif 1914 #ifdef INET6 1915 case AF_INET6: 1916 #endif 1917 break; 1918 default: 1919 return (EPROTOTYPE); 1920 } 1921 1922 sx_xlock(&carp_sx); 1923 if (ifp->if_carp == NULL) { 1924 sx_xunlock(&carp_sx); 1925 return (ENOPROTOOPT); 1926 } 1927 1928 IFNET_FOREACH_CARP(ifp, sc) 1929 if (sc->sc_vhid == vhid) 1930 break; 1931 if (sc == NULL) { 1932 sx_xunlock(&carp_sx); 1933 return (ENOENT); 1934 } 1935 1936 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 1937 if (error) { 1938 CIF_FREE(cif); 1939 sx_xunlock(&carp_sx); 1940 return (error); 1941 } 1942 1943 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 1944 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 1945 carp_grow_ifas(sc); 1946 1947 switch (ifa->ifa_addr->sa_family) { 1948 #ifdef INET 1949 case AF_INET: 1950 cif->cif_naddrs++; 1951 sc->sc_naddrs++; 1952 break; 1953 #endif 1954 #ifdef INET6 1955 case AF_INET6: 1956 cif->cif_naddrs6++; 1957 sc->sc_naddrs6++; 1958 break; 1959 #endif 1960 } 1961 1962 ifa_ref(ifa); 1963 1964 CARP_LOCK(sc); 1965 sc->sc_ifas[index - 1] = ifa; 1966 ifa->ifa_carp = sc; 1967 carp_hmac_prepare(sc); 1968 carp_sc_state(sc); 1969 CARP_UNLOCK(sc); 1970 1971 sx_xunlock(&carp_sx); 1972 1973 return (0); 1974 } 1975 1976 void 1977 carp_detach(struct ifaddr *ifa, bool keep_cif) 1978 { 1979 struct ifnet *ifp = ifa->ifa_ifp; 1980 struct carp_if *cif = ifp->if_carp; 1981 struct carp_softc *sc = ifa->ifa_carp; 1982 int i, index; 1983 1984 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 1985 1986 sx_xlock(&carp_sx); 1987 1988 CARP_LOCK(sc); 1989 /* Shift array. */ 1990 index = sc->sc_naddrs + sc->sc_naddrs6; 1991 for (i = 0; i < index; i++) 1992 if (sc->sc_ifas[i] == ifa) 1993 break; 1994 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 1995 for (; i < index - 1; i++) 1996 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 1997 sc->sc_ifas[index - 1] = NULL; 1998 1999 switch (ifa->ifa_addr->sa_family) { 2000 #ifdef INET 2001 case AF_INET: 2002 cif->cif_naddrs--; 2003 sc->sc_naddrs--; 2004 break; 2005 #endif 2006 #ifdef INET6 2007 case AF_INET6: 2008 cif->cif_naddrs6--; 2009 sc->sc_naddrs6--; 2010 break; 2011 #endif 2012 } 2013 2014 carp_ifa_delroute(ifa); 2015 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2016 2017 ifa->ifa_carp = NULL; 2018 ifa_free(ifa); 2019 2020 carp_hmac_prepare(sc); 2021 carp_sc_state(sc); 2022 2023 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2024 carp_destroy(sc); 2025 else 2026 CARP_UNLOCK(sc); 2027 2028 if (!keep_cif) 2029 CIF_FREE(cif); 2030 2031 sx_xunlock(&carp_sx); 2032 } 2033 2034 static void 2035 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2036 { 2037 2038 CARP_LOCK_ASSERT(sc); 2039 2040 if (sc->sc_state != state) { 2041 const char *carp_states[] = { CARP_STATES }; 2042 char subsys[IFNAMSIZ+5]; 2043 2044 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2045 sc->sc_carpdev->if_xname); 2046 2047 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2048 carp_states[sc->sc_state], carp_states[state], reason); 2049 2050 sc->sc_state = state; 2051 2052 devctl_notify("CARP", subsys, carp_states[state], NULL); 2053 } 2054 } 2055 2056 static void 2057 carp_linkstate(struct ifnet *ifp) 2058 { 2059 struct carp_softc *sc; 2060 2061 CIF_LOCK(ifp->if_carp); 2062 IFNET_FOREACH_CARP(ifp, sc) { 2063 CARP_LOCK(sc); 2064 carp_sc_state(sc); 2065 CARP_UNLOCK(sc); 2066 } 2067 CIF_UNLOCK(ifp->if_carp); 2068 } 2069 2070 static void 2071 carp_sc_state(struct carp_softc *sc) 2072 { 2073 2074 CARP_LOCK_ASSERT(sc); 2075 2076 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2077 !(sc->sc_carpdev->if_flags & IFF_UP) || 2078 !V_carp_allow) { 2079 callout_stop(&sc->sc_ad_tmo); 2080 #ifdef INET 2081 callout_stop(&sc->sc_md_tmo); 2082 #endif 2083 #ifdef INET6 2084 callout_stop(&sc->sc_md6_tmo); 2085 #endif 2086 carp_set_state(sc, INIT, "hardware interface down"); 2087 carp_setrun(sc, 0); 2088 if (!sc->sc_suppress) 2089 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2090 sc->sc_suppress = 1; 2091 } else { 2092 carp_set_state(sc, INIT, "hardware interface up"); 2093 carp_setrun(sc, 0); 2094 if (sc->sc_suppress) 2095 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2096 sc->sc_suppress = 0; 2097 } 2098 } 2099 2100 static void 2101 carp_demote_adj(int adj, char *reason) 2102 { 2103 atomic_add_int(&V_carp_demotion, adj); 2104 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2105 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2106 } 2107 2108 static int 2109 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2110 { 2111 int new, error; 2112 struct carp_softc *sc; 2113 2114 new = V_carp_allow; 2115 error = sysctl_handle_int(oidp, &new, 0, req); 2116 if (error || !req->newptr) 2117 return (error); 2118 2119 if (V_carp_allow != new) { 2120 V_carp_allow = new; 2121 2122 mtx_lock(&carp_mtx); 2123 LIST_FOREACH(sc, &carp_list, sc_next) { 2124 CARP_LOCK(sc); 2125 if (curvnet == sc->sc_carpdev->if_vnet) 2126 carp_sc_state(sc); 2127 CARP_UNLOCK(sc); 2128 } 2129 mtx_unlock(&carp_mtx); 2130 } 2131 2132 return (0); 2133 } 2134 2135 static int 2136 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2137 { 2138 int new, error; 2139 2140 new = V_carp_dscp; 2141 error = sysctl_handle_int(oidp, &new, 0, req); 2142 if (error || !req->newptr) 2143 return (error); 2144 2145 if (new < 0 || new > 63) 2146 return (EINVAL); 2147 2148 V_carp_dscp = new; 2149 2150 return (0); 2151 } 2152 2153 static int 2154 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2155 { 2156 int new, error; 2157 2158 new = V_carp_demotion; 2159 error = sysctl_handle_int(oidp, &new, 0, req); 2160 if (error || !req->newptr) 2161 return (error); 2162 2163 carp_demote_adj(new, "sysctl"); 2164 2165 return (0); 2166 } 2167 2168 #ifdef INET 2169 extern struct domain inetdomain; 2170 static struct protosw in_carp_protosw = { 2171 .pr_type = SOCK_RAW, 2172 .pr_domain = &inetdomain, 2173 .pr_protocol = IPPROTO_CARP, 2174 .pr_flags = PR_ATOMIC|PR_ADDR, 2175 .pr_input = carp_input, 2176 .pr_output = rip_output, 2177 .pr_ctloutput = rip_ctloutput, 2178 .pr_usrreqs = &rip_usrreqs 2179 }; 2180 #endif 2181 2182 #ifdef INET6 2183 extern struct domain inet6domain; 2184 static struct protosw in6_carp_protosw = { 2185 .pr_type = SOCK_RAW, 2186 .pr_domain = &inet6domain, 2187 .pr_protocol = IPPROTO_CARP, 2188 .pr_flags = PR_ATOMIC|PR_ADDR, 2189 .pr_input = carp6_input, 2190 .pr_output = rip6_output, 2191 .pr_ctloutput = rip6_ctloutput, 2192 .pr_usrreqs = &rip6_usrreqs 2193 }; 2194 #endif 2195 2196 static void 2197 carp_mod_cleanup(void) 2198 { 2199 2200 #ifdef INET 2201 if (proto_reg[CARP_INET] == 0) { 2202 (void)ipproto_unregister(IPPROTO_CARP); 2203 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); 2204 proto_reg[CARP_INET] = -1; 2205 } 2206 carp_iamatch_p = NULL; 2207 #endif 2208 #ifdef INET6 2209 if (proto_reg[CARP_INET6] == 0) { 2210 (void)ip6proto_unregister(IPPROTO_CARP); 2211 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); 2212 proto_reg[CARP_INET6] = -1; 2213 } 2214 carp_iamatch6_p = NULL; 2215 carp_macmatch6_p = NULL; 2216 #endif 2217 carp_ioctl_p = NULL; 2218 carp_attach_p = NULL; 2219 carp_detach_p = NULL; 2220 carp_get_vhid_p = NULL; 2221 carp_linkstate_p = NULL; 2222 carp_forus_p = NULL; 2223 carp_output_p = NULL; 2224 carp_demote_adj_p = NULL; 2225 carp_master_p = NULL; 2226 mtx_unlock(&carp_mtx); 2227 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2228 mtx_destroy(&carp_mtx); 2229 sx_destroy(&carp_sx); 2230 } 2231 2232 static int 2233 carp_mod_load(void) 2234 { 2235 int err; 2236 2237 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2238 sx_init(&carp_sx, "carp_sx"); 2239 LIST_INIT(&carp_list); 2240 carp_get_vhid_p = carp_get_vhid; 2241 carp_forus_p = carp_forus; 2242 carp_output_p = carp_output; 2243 carp_linkstate_p = carp_linkstate; 2244 carp_ioctl_p = carp_ioctl; 2245 carp_attach_p = carp_attach; 2246 carp_detach_p = carp_detach; 2247 carp_demote_adj_p = carp_demote_adj; 2248 carp_master_p = carp_master; 2249 #ifdef INET6 2250 carp_iamatch6_p = carp_iamatch6; 2251 carp_macmatch6_p = carp_macmatch6; 2252 proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, 2253 (struct protosw *)&in6_carp_protosw); 2254 if (proto_reg[CARP_INET6]) { 2255 printf("carp: error %d attaching to PF_INET6\n", 2256 proto_reg[CARP_INET6]); 2257 carp_mod_cleanup(); 2258 return (proto_reg[CARP_INET6]); 2259 } 2260 err = ip6proto_register(IPPROTO_CARP); 2261 if (err) { 2262 printf("carp: error %d registering with INET6\n", err); 2263 carp_mod_cleanup(); 2264 return (err); 2265 } 2266 #endif 2267 #ifdef INET 2268 carp_iamatch_p = carp_iamatch; 2269 proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); 2270 if (proto_reg[CARP_INET]) { 2271 printf("carp: error %d attaching to PF_INET\n", 2272 proto_reg[CARP_INET]); 2273 carp_mod_cleanup(); 2274 return (proto_reg[CARP_INET]); 2275 } 2276 err = ipproto_register(IPPROTO_CARP); 2277 if (err) { 2278 printf("carp: error %d registering with INET\n", err); 2279 carp_mod_cleanup(); 2280 return (err); 2281 } 2282 #endif 2283 return (0); 2284 } 2285 2286 static int 2287 carp_modevent(module_t mod, int type, void *data) 2288 { 2289 switch (type) { 2290 case MOD_LOAD: 2291 return carp_mod_load(); 2292 /* NOTREACHED */ 2293 case MOD_UNLOAD: 2294 mtx_lock(&carp_mtx); 2295 if (LIST_EMPTY(&carp_list)) 2296 carp_mod_cleanup(); 2297 else { 2298 mtx_unlock(&carp_mtx); 2299 return (EBUSY); 2300 } 2301 break; 2302 2303 default: 2304 return (EINVAL); 2305 } 2306 2307 return (0); 2308 } 2309 2310 static moduledata_t carp_mod = { 2311 "carp", 2312 carp_modevent, 2313 0 2314 }; 2315 2316 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2317