1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "opt_netlink.h" 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include "opt_bpf.h" 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/devctl.h> 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/module.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/socket.h> 52 #include <sys/sockio.h> 53 #include <sys/sysctl.h> 54 #include <sys/syslog.h> 55 #include <sys/taskqueue.h> 56 #include <sys/counter.h> 57 58 #include <net/ethernet.h> 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_dl.h> 62 #include <net/if_llatbl.h> 63 #include <net/if_private.h> 64 #include <net/if_types.h> 65 #include <net/route.h> 66 #include <net/vnet.h> 67 68 #if defined(INET) || defined(INET6) 69 #include <netinet/in.h> 70 #include <netinet/in_var.h> 71 #include <netinet/ip_carp.h> 72 #include <netinet/ip_carp_nl.h> 73 #include <netinet/ip.h> 74 #include <machine/in_cksum.h> 75 #endif 76 #ifdef INET 77 #include <netinet/ip_var.h> 78 #include <netinet/if_ether.h> 79 #endif 80 81 #ifdef INET6 82 #include <netinet/icmp6.h> 83 #include <netinet/ip6.h> 84 #include <netinet6/in6_var.h> 85 #include <netinet6/ip6_var.h> 86 #include <netinet6/scope6_var.h> 87 #include <netinet6/nd6.h> 88 #endif 89 90 #include <netlink/netlink.h> 91 #include <netlink/netlink_ctl.h> 92 #include <netlink/netlink_generic.h> 93 #include <netlink/netlink_message_parser.h> 94 95 #include <crypto/sha1.h> 96 97 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 98 99 struct carp_softc { 100 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 101 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 102 struct sockaddr_dl sc_addr; /* Our link level address. */ 103 struct callout sc_ad_tmo; /* Advertising timeout. */ 104 #ifdef INET 105 struct callout sc_md_tmo; /* Master down timeout. */ 106 #endif 107 #ifdef INET6 108 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 109 #endif 110 struct mtx sc_mtx; 111 112 int sc_vhid; 113 int sc_advskew; 114 int sc_advbase; 115 struct in_addr sc_carpaddr; 116 struct in6_addr sc_carpaddr6; 117 118 int sc_naddrs; 119 int sc_naddrs6; 120 int sc_ifasiz; 121 enum { INIT = 0, BACKUP, MASTER } sc_state; 122 int sc_suppress; 123 int sc_sendad_errors; 124 #define CARP_SENDAD_MAX_ERRORS 3 125 int sc_sendad_success; 126 #define CARP_SENDAD_MIN_SUCCESS 3 127 128 int sc_init_counter; 129 uint64_t sc_counter; 130 131 /* authentication */ 132 #define CARP_HMAC_PAD 64 133 unsigned char sc_key[CARP_KEY_LEN]; 134 unsigned char sc_pad[CARP_HMAC_PAD]; 135 SHA1_CTX sc_sha1; 136 137 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 138 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 139 }; 140 141 struct carp_if { 142 #ifdef INET 143 int cif_naddrs; 144 #endif 145 #ifdef INET6 146 int cif_naddrs6; 147 #endif 148 TAILQ_HEAD(, carp_softc) cif_vrs; 149 #ifdef INET 150 struct ip_moptions cif_imo; 151 #endif 152 #ifdef INET6 153 struct ip6_moptions cif_im6o; 154 #endif 155 struct ifnet *cif_ifp; 156 struct mtx cif_mtx; 157 uint32_t cif_flags; 158 #define CIF_PROMISC 0x00000001 159 }; 160 161 /* Kernel equivalent of struct carpreq, but with more fields for new features. 162 * */ 163 struct carpkreq { 164 int carpr_count; 165 int carpr_vhid; 166 int carpr_state; 167 int carpr_advskew; 168 int carpr_advbase; 169 unsigned char carpr_key[CARP_KEY_LEN]; 170 /* Everything above this is identical to carpreq */ 171 struct in_addr carpr_addr; 172 struct in6_addr carpr_addr6; 173 }; 174 175 /* 176 * Brief design of carp(4). 177 * 178 * Any carp-capable ifnet may have a list of carp softcs hanging off 179 * its ifp->if_carp pointer. Each softc represents one unique virtual 180 * host id, or vhid. The softc has a back pointer to the ifnet. All 181 * softcs are joined in a global list, which has quite limited use. 182 * 183 * Any interface address that takes part in CARP negotiation has a 184 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 185 * AF_INET or AF_INET6 address. 186 * 187 * Although, one can get the softc's backpointer to ifnet and traverse 188 * through its ifp->if_addrhead queue to find all interface addresses 189 * involved in CARP, we keep a growable array of ifaddr pointers. This 190 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 191 * do calls into the network stack, thus avoiding LORs. 192 * 193 * Locking: 194 * 195 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 196 * callout-driven events and ioctl()s. 197 * 198 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 199 * To traverse the global list we use the mutex carp_mtx. 200 * 201 * Known issues with locking: 202 * 203 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 204 * counting is done on the softc. 205 * - On module unload we may race (?) with packet processing thread 206 * dereferencing our function pointers. 207 */ 208 209 /* Accept incoming CARP packets. */ 210 VNET_DEFINE_STATIC(int, carp_allow) = 1; 211 #define V_carp_allow VNET(carp_allow) 212 213 /* Set DSCP in outgoing CARP packets. */ 214 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 215 #define V_carp_dscp VNET(carp_dscp) 216 217 /* Preempt slower nodes. */ 218 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 219 #define V_carp_preempt VNET(carp_preempt) 220 221 /* Log level. */ 222 VNET_DEFINE_STATIC(int, carp_log) = 1; 223 #define V_carp_log VNET(carp_log) 224 225 /* Global advskew demotion. */ 226 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 227 #define V_carp_demotion VNET(carp_demotion) 228 229 /* Send error demotion factor. */ 230 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 231 #define V_carp_senderr_adj VNET(carp_senderr_adj) 232 233 /* Iface down demotion factor. */ 234 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 235 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 236 237 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 238 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 239 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 240 241 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 242 "CARP"); 243 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 244 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 245 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 246 "Accept incoming CARP packets"); 247 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 248 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 249 0, 0, carp_dscp_sysctl, "I", 250 "DSCP value for carp packets"); 251 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 252 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 253 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 254 &VNET_NAME(carp_log), 0, "CARP log level"); 255 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 256 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 257 0, 0, carp_demote_adj_sysctl, "I", 258 "Adjust demotion factor (skew of advskew)"); 259 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 260 CTLFLAG_VNET | CTLFLAG_RW, 261 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 262 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 263 CTLFLAG_VNET | CTLFLAG_RW, 264 &VNET_NAME(carp_ifdown_adj), 0, 265 "Interface down demotion factor adjustment"); 266 267 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 268 VNET_PCPUSTAT_SYSINIT(carpstats); 269 VNET_PCPUSTAT_SYSUNINIT(carpstats); 270 271 #define CARPSTATS_ADD(name, val) \ 272 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 273 sizeof(uint64_t)], (val)) 274 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 275 276 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 277 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 278 279 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 280 NULL, MTX_DEF) 281 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 282 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 283 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 284 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 285 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 286 NULL, MTX_DEF) 287 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 288 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 289 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 290 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 291 #define CIF_FREE(cif) do { \ 292 CIF_LOCK(cif); \ 293 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 294 carp_free_if(cif); \ 295 else \ 296 CIF_UNLOCK(cif); \ 297 } while (0) 298 299 #define CARP_LOG(...) do { \ 300 if (V_carp_log > 0) \ 301 log(LOG_INFO, "carp: " __VA_ARGS__); \ 302 } while (0) 303 304 #define CARP_DEBUG(...) do { \ 305 if (V_carp_log > 1) \ 306 log(LOG_DEBUG, __VA_ARGS__); \ 307 } while (0) 308 309 #define IFNET_FOREACH_IFA(ifp, ifa) \ 310 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 311 if ((ifa)->ifa_carp != NULL) 312 313 #define CARP_FOREACH_IFA(sc, ifa) \ 314 CARP_LOCK_ASSERT(sc); \ 315 for (int _i = 0; \ 316 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 317 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 318 ++_i) 319 320 #define IFNET_FOREACH_CARP(ifp, sc) \ 321 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 322 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 323 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 324 325 #define DEMOTE_ADVSKEW(sc) \ 326 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 327 CARP_MAXSKEW : \ 328 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 329 0 : ((sc)->sc_advskew + V_carp_demotion))) 330 331 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 332 static struct carp_softc 333 *carp_alloc(struct ifnet *); 334 static void carp_destroy(struct carp_softc *); 335 static struct carp_if 336 *carp_alloc_if(struct ifnet *); 337 static void carp_free_if(struct carp_if *); 338 static void carp_set_state(struct carp_softc *, int, const char* reason); 339 static void carp_sc_state(struct carp_softc *); 340 static void carp_setrun(struct carp_softc *, sa_family_t); 341 static void carp_master_down(void *); 342 static void carp_master_down_locked(struct carp_softc *, 343 const char* reason); 344 static void carp_send_ad(void *); 345 static void carp_send_ad_locked(struct carp_softc *); 346 static void carp_addroute(struct carp_softc *); 347 static void carp_ifa_addroute(struct ifaddr *); 348 static void carp_delroute(struct carp_softc *); 349 static void carp_ifa_delroute(struct ifaddr *); 350 static void carp_send_ad_all(void *, int); 351 static void carp_demote_adj(int, char *); 352 353 static LIST_HEAD(, carp_softc) carp_list; 354 static struct mtx carp_mtx; 355 static struct sx carp_sx; 356 static struct task carp_sendall_task = 357 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 358 359 static int 360 carp_is_supported_if(if_t ifp) 361 { 362 if (ifp == NULL) 363 return (ENXIO); 364 365 switch (ifp->if_type) { 366 case IFT_ETHER: 367 case IFT_L2VLAN: 368 case IFT_BRIDGE: 369 break; 370 default: 371 return (EOPNOTSUPP); 372 } 373 374 return (0); 375 } 376 377 static void 378 carp_hmac_prepare(struct carp_softc *sc) 379 { 380 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 381 uint8_t vhid = sc->sc_vhid & 0xff; 382 struct ifaddr *ifa; 383 int i, found; 384 #ifdef INET 385 struct in_addr last, cur, in; 386 #endif 387 #ifdef INET6 388 struct in6_addr last6, cur6, in6; 389 #endif 390 391 CARP_LOCK_ASSERT(sc); 392 393 /* Compute ipad from key. */ 394 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 395 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 396 for (i = 0; i < sizeof(sc->sc_pad); i++) 397 sc->sc_pad[i] ^= 0x36; 398 399 /* Precompute first part of inner hash. */ 400 SHA1Init(&sc->sc_sha1); 401 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 402 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 403 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 404 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 405 #ifdef INET 406 cur.s_addr = 0; 407 do { 408 found = 0; 409 last = cur; 410 cur.s_addr = 0xffffffff; 411 CARP_FOREACH_IFA(sc, ifa) { 412 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 413 if (ifa->ifa_addr->sa_family == AF_INET && 414 ntohl(in.s_addr) > ntohl(last.s_addr) && 415 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 416 cur.s_addr = in.s_addr; 417 found++; 418 } 419 } 420 if (found) 421 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 422 } while (found); 423 #endif /* INET */ 424 #ifdef INET6 425 memset(&cur6, 0, sizeof(cur6)); 426 do { 427 found = 0; 428 last6 = cur6; 429 memset(&cur6, 0xff, sizeof(cur6)); 430 CARP_FOREACH_IFA(sc, ifa) { 431 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 432 if (IN6_IS_SCOPE_EMBED(&in6)) 433 in6.s6_addr16[1] = 0; 434 if (ifa->ifa_addr->sa_family == AF_INET6 && 435 memcmp(&in6, &last6, sizeof(in6)) > 0 && 436 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 437 cur6 = in6; 438 found++; 439 } 440 } 441 if (found) 442 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 443 } while (found); 444 #endif /* INET6 */ 445 446 /* convert ipad to opad */ 447 for (i = 0; i < sizeof(sc->sc_pad); i++) 448 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 449 } 450 451 static void 452 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 453 unsigned char md[20]) 454 { 455 SHA1_CTX sha1ctx; 456 457 CARP_LOCK_ASSERT(sc); 458 459 /* fetch first half of inner hash */ 460 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 461 462 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 463 SHA1Final(md, &sha1ctx); 464 465 /* outer hash */ 466 SHA1Init(&sha1ctx); 467 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 468 SHA1Update(&sha1ctx, md, 20); 469 SHA1Final(md, &sha1ctx); 470 } 471 472 static int 473 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 474 unsigned char md[20]) 475 { 476 unsigned char md2[20]; 477 478 CARP_LOCK_ASSERT(sc); 479 480 carp_hmac_generate(sc, counter, md2); 481 482 return (bcmp(md, md2, sizeof(md2))); 483 } 484 485 /* 486 * process input packet. 487 * we have rearranged checks order compared to the rfc, 488 * but it seems more efficient this way or not possible otherwise. 489 */ 490 #ifdef INET 491 static int 492 carp_input(struct mbuf **mp, int *offp, int proto) 493 { 494 struct mbuf *m = *mp; 495 struct ip *ip = mtod(m, struct ip *); 496 struct carp_header *ch; 497 int iplen, len; 498 499 iplen = *offp; 500 *mp = NULL; 501 502 CARPSTATS_INC(carps_ipackets); 503 504 if (!V_carp_allow) { 505 m_freem(m); 506 return (IPPROTO_DONE); 507 } 508 509 iplen = ip->ip_hl << 2; 510 511 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 512 CARPSTATS_INC(carps_badlen); 513 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 514 "on %s\n", __func__, m->m_len - sizeof(struct ip), 515 if_name(m->m_pkthdr.rcvif)); 516 m_freem(m); 517 return (IPPROTO_DONE); 518 } 519 520 if (iplen + sizeof(*ch) < m->m_len) { 521 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 522 CARPSTATS_INC(carps_hdrops); 523 CARP_DEBUG("%s: pullup failed\n", __func__); 524 return (IPPROTO_DONE); 525 } 526 ip = mtod(m, struct ip *); 527 } 528 ch = (struct carp_header *)((char *)ip + iplen); 529 530 /* 531 * verify that the received packet length is 532 * equal to the CARP header 533 */ 534 len = iplen + sizeof(*ch); 535 if (len > m->m_pkthdr.len) { 536 CARPSTATS_INC(carps_badlen); 537 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 538 m->m_pkthdr.len, 539 if_name(m->m_pkthdr.rcvif)); 540 m_freem(m); 541 return (IPPROTO_DONE); 542 } 543 544 if ((m = m_pullup(m, len)) == NULL) { 545 CARPSTATS_INC(carps_hdrops); 546 return (IPPROTO_DONE); 547 } 548 ip = mtod(m, struct ip *); 549 ch = (struct carp_header *)((char *)ip + iplen); 550 551 /* verify the CARP checksum */ 552 m->m_data += iplen; 553 if (in_cksum(m, len - iplen)) { 554 CARPSTATS_INC(carps_badsum); 555 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 556 if_name(m->m_pkthdr.rcvif)); 557 m_freem(m); 558 return (IPPROTO_DONE); 559 } 560 m->m_data -= iplen; 561 562 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 563 return (IPPROTO_DONE); 564 } 565 #endif 566 567 #ifdef INET6 568 static int 569 carp6_input(struct mbuf **mp, int *offp, int proto) 570 { 571 struct mbuf *m = *mp; 572 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 573 struct carp_header *ch; 574 u_int len; 575 576 CARPSTATS_INC(carps_ipackets6); 577 578 if (!V_carp_allow) { 579 m_freem(m); 580 return (IPPROTO_DONE); 581 } 582 583 /* check if received on a valid carp interface */ 584 if (m->m_pkthdr.rcvif->if_carp == NULL) { 585 CARPSTATS_INC(carps_badif); 586 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 587 __func__, if_name(m->m_pkthdr.rcvif)); 588 m_freem(m); 589 return (IPPROTO_DONE); 590 } 591 592 /* verify that we have a complete carp packet */ 593 if (m->m_len < *offp + sizeof(*ch)) { 594 len = m->m_len; 595 m = m_pullup(m, *offp + sizeof(*ch)); 596 if (m == NULL) { 597 CARPSTATS_INC(carps_badlen); 598 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 599 return (IPPROTO_DONE); 600 } 601 ip6 = mtod(m, struct ip6_hdr *); 602 } 603 ch = (struct carp_header *)(mtod(m, char *) + *offp); 604 605 /* verify the CARP checksum */ 606 m->m_data += *offp; 607 if (in_cksum(m, sizeof(*ch))) { 608 CARPSTATS_INC(carps_badsum); 609 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 610 if_name(m->m_pkthdr.rcvif)); 611 m_freem(m); 612 return (IPPROTO_DONE); 613 } 614 m->m_data -= *offp; 615 616 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 617 return (IPPROTO_DONE); 618 } 619 #endif /* INET6 */ 620 621 /* 622 * This routine should not be necessary at all, but some switches 623 * (VMWare ESX vswitches) can echo our own packets back at us, 624 * and we must ignore them or they will cause us to drop out of 625 * MASTER mode. 626 * 627 * We cannot catch all cases of network loops. Instead, what we 628 * do here is catch any packet that arrives with a carp header 629 * with a VHID of 0, that comes from an address that is our own. 630 * These packets are by definition "from us" (even if they are from 631 * a misconfigured host that is pretending to be us). 632 * 633 * The VHID test is outside this mini-function. 634 */ 635 static int 636 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 637 { 638 #ifdef INET 639 struct ip *ip4; 640 struct in_addr in4; 641 #endif 642 #ifdef INET6 643 struct ip6_hdr *ip6; 644 struct in6_addr in6; 645 #endif 646 647 switch (af) { 648 #ifdef INET 649 case AF_INET: 650 ip4 = mtod(m, struct ip *); 651 in4 = ifatoia(ifa)->ia_addr.sin_addr; 652 return (in4.s_addr == ip4->ip_src.s_addr); 653 #endif 654 #ifdef INET6 655 case AF_INET6: 656 ip6 = mtod(m, struct ip6_hdr *); 657 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 658 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 659 #endif 660 default: 661 break; 662 } 663 return (0); 664 } 665 666 static void 667 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 668 { 669 struct ifnet *ifp = m->m_pkthdr.rcvif; 670 struct ifaddr *ifa, *match; 671 struct carp_softc *sc; 672 uint64_t tmp_counter; 673 struct timeval sc_tv, ch_tv; 674 int error; 675 bool multicast = false; 676 677 NET_EPOCH_ASSERT(); 678 679 /* 680 * Verify that the VHID is valid on the receiving interface. 681 * 682 * There should be just one match. If there are none 683 * the VHID is not valid and we drop the packet. If 684 * there are multiple VHID matches, take just the first 685 * one, for compatibility with previous code. While we're 686 * scanning, check for obvious loops in the network topology 687 * (these should never happen, and as noted above, we may 688 * miss real loops; this is just a double-check). 689 */ 690 error = 0; 691 match = NULL; 692 IFNET_FOREACH_IFA(ifp, ifa) { 693 if (match == NULL && ifa->ifa_carp != NULL && 694 ifa->ifa_addr->sa_family == af && 695 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 696 match = ifa; 697 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 698 error = ELOOP; 699 } 700 ifa = error ? NULL : match; 701 if (ifa != NULL) 702 ifa_ref(ifa); 703 704 if (ifa == NULL) { 705 if (error == ELOOP) { 706 CARP_DEBUG("dropping looped packet on interface %s\n", 707 if_name(ifp)); 708 CARPSTATS_INC(carps_badif); /* ??? */ 709 } else { 710 CARPSTATS_INC(carps_badvhid); 711 } 712 m_freem(m); 713 return; 714 } 715 716 /* verify the CARP version. */ 717 if (ch->carp_version != CARP_VERSION) { 718 CARPSTATS_INC(carps_badver); 719 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 720 ch->carp_version); 721 ifa_free(ifa); 722 m_freem(m); 723 return; 724 } 725 726 sc = ifa->ifa_carp; 727 CARP_LOCK(sc); 728 if (ifa->ifa_addr->sa_family == AF_INET) { 729 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); 730 } else { 731 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 732 } 733 ifa_free(ifa); 734 735 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 736 if (multicast && ttl != CARP_DFLTTL) { 737 CARPSTATS_INC(carps_badttl); 738 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 739 ttl, if_name(m->m_pkthdr.rcvif)); 740 goto out; 741 } 742 743 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 744 CARPSTATS_INC(carps_badauth); 745 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 746 sc->sc_vhid, if_name(ifp)); 747 goto out; 748 } 749 750 tmp_counter = ntohl(ch->carp_counter[0]); 751 tmp_counter = tmp_counter<<32; 752 tmp_counter += ntohl(ch->carp_counter[1]); 753 754 /* XXX Replay protection goes here */ 755 756 sc->sc_init_counter = 0; 757 sc->sc_counter = tmp_counter; 758 759 sc_tv.tv_sec = sc->sc_advbase; 760 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 761 ch_tv.tv_sec = ch->carp_advbase; 762 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 763 764 switch (sc->sc_state) { 765 case INIT: 766 break; 767 case MASTER: 768 /* 769 * If we receive an advertisement from a master who's going to 770 * be more frequent than us, go into BACKUP state. 771 */ 772 if (timevalcmp(&sc_tv, &ch_tv, >) || 773 timevalcmp(&sc_tv, &ch_tv, ==)) { 774 callout_stop(&sc->sc_ad_tmo); 775 carp_set_state(sc, BACKUP, 776 "more frequent advertisement received"); 777 carp_setrun(sc, 0); 778 carp_delroute(sc); 779 } 780 break; 781 case BACKUP: 782 /* 783 * If we're pre-empting masters who advertise slower than us, 784 * and this one claims to be slower, treat him as down. 785 */ 786 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 787 carp_master_down_locked(sc, 788 "preempting a slower master"); 789 break; 790 } 791 792 /* 793 * If the master is going to advertise at such a low frequency 794 * that he's guaranteed to time out, we'd might as well just 795 * treat him as timed out now. 796 */ 797 sc_tv.tv_sec = sc->sc_advbase * 3; 798 if (timevalcmp(&sc_tv, &ch_tv, <)) { 799 carp_master_down_locked(sc, "master will time out"); 800 break; 801 } 802 803 /* 804 * Otherwise, we reset the counter and wait for the next 805 * advertisement. 806 */ 807 carp_setrun(sc, af); 808 break; 809 } 810 811 out: 812 CARP_UNLOCK(sc); 813 m_freem(m); 814 } 815 816 static int 817 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 818 { 819 struct m_tag *mtag; 820 821 if (sc->sc_init_counter) { 822 /* this could also be seconds since unix epoch */ 823 sc->sc_counter = arc4random(); 824 sc->sc_counter = sc->sc_counter << 32; 825 sc->sc_counter += arc4random(); 826 } else 827 sc->sc_counter++; 828 829 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 830 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 831 832 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 833 834 /* Tag packet for carp_output */ 835 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 836 M_NOWAIT)) == NULL) { 837 m_freem(m); 838 CARPSTATS_INC(carps_onomem); 839 return (ENOMEM); 840 } 841 bcopy(&sc, mtag + 1, sizeof(sc)); 842 m_tag_prepend(m, mtag); 843 844 return (0); 845 } 846 847 /* 848 * To avoid LORs and possible recursions this function shouldn't 849 * be called directly, but scheduled via taskqueue. 850 */ 851 static void 852 carp_send_ad_all(void *ctx __unused, int pending __unused) 853 { 854 struct carp_softc *sc; 855 struct epoch_tracker et; 856 857 NET_EPOCH_ENTER(et); 858 mtx_lock(&carp_mtx); 859 LIST_FOREACH(sc, &carp_list, sc_next) 860 if (sc->sc_state == MASTER) { 861 CARP_LOCK(sc); 862 CURVNET_SET(sc->sc_carpdev->if_vnet); 863 carp_send_ad_locked(sc); 864 CURVNET_RESTORE(); 865 CARP_UNLOCK(sc); 866 } 867 mtx_unlock(&carp_mtx); 868 NET_EPOCH_EXIT(et); 869 } 870 871 /* Send a periodic advertisement, executed in callout context. */ 872 static void 873 carp_send_ad(void *v) 874 { 875 struct carp_softc *sc = v; 876 struct epoch_tracker et; 877 878 NET_EPOCH_ENTER(et); 879 CARP_LOCK_ASSERT(sc); 880 CURVNET_SET(sc->sc_carpdev->if_vnet); 881 carp_send_ad_locked(sc); 882 CURVNET_RESTORE(); 883 CARP_UNLOCK(sc); 884 NET_EPOCH_EXIT(et); 885 } 886 887 static void 888 carp_send_ad_error(struct carp_softc *sc, int error) 889 { 890 891 /* 892 * We track errors and successfull sends with this logic: 893 * - Any error resets success counter to 0. 894 * - MAX_ERRORS triggers demotion. 895 * - MIN_SUCCESS successes resets error counter to 0. 896 * - MIN_SUCCESS reverts demotion, if it was triggered before. 897 */ 898 if (error) { 899 if (sc->sc_sendad_errors < INT_MAX) 900 sc->sc_sendad_errors++; 901 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 902 static const char fmt[] = "send error %d on %s"; 903 char msg[sizeof(fmt) + IFNAMSIZ]; 904 905 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 906 carp_demote_adj(V_carp_senderr_adj, msg); 907 } 908 sc->sc_sendad_success = 0; 909 } else if (sc->sc_sendad_errors > 0) { 910 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 911 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 912 static const char fmt[] = "send ok on %s"; 913 char msg[sizeof(fmt) + IFNAMSIZ]; 914 915 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 916 carp_demote_adj(-V_carp_senderr_adj, msg); 917 } 918 sc->sc_sendad_errors = 0; 919 } 920 } 921 } 922 923 /* 924 * Pick the best ifaddr on the given ifp for sending CARP 925 * advertisements. 926 * 927 * "Best" here is defined by ifa_preferred(). This function is much 928 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 929 * 930 * (This could be simplified to return the actual address, except that 931 * it has a different format in AF_INET and AF_INET6.) 932 */ 933 static struct ifaddr * 934 carp_best_ifa(int af, struct ifnet *ifp) 935 { 936 struct ifaddr *ifa, *best; 937 938 NET_EPOCH_ASSERT(); 939 940 if (af >= AF_MAX) 941 return (NULL); 942 best = NULL; 943 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 944 if (ifa->ifa_addr->sa_family == af && 945 (best == NULL || ifa_preferred(best, ifa))) 946 best = ifa; 947 } 948 if (best != NULL) 949 ifa_ref(best); 950 return (best); 951 } 952 953 static void 954 carp_send_ad_locked(struct carp_softc *sc) 955 { 956 struct carp_header ch; 957 struct timeval tv; 958 struct ifaddr *ifa; 959 struct carp_header *ch_ptr; 960 struct mbuf *m; 961 int len, advskew; 962 963 NET_EPOCH_ASSERT(); 964 CARP_LOCK_ASSERT(sc); 965 966 advskew = DEMOTE_ADVSKEW(sc); 967 tv.tv_sec = sc->sc_advbase; 968 tv.tv_usec = advskew * 1000000 / 256; 969 970 ch.carp_version = CARP_VERSION; 971 ch.carp_type = CARP_ADVERTISEMENT; 972 ch.carp_vhid = sc->sc_vhid; 973 ch.carp_advbase = sc->sc_advbase; 974 ch.carp_advskew = advskew; 975 ch.carp_authlen = 7; /* XXX DEFINE */ 976 ch.carp_pad1 = 0; /* must be zero */ 977 ch.carp_cksum = 0; 978 979 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 980 981 #ifdef INET 982 if (sc->sc_naddrs) { 983 struct ip *ip; 984 985 m = m_gethdr(M_NOWAIT, MT_DATA); 986 if (m == NULL) { 987 CARPSTATS_INC(carps_onomem); 988 goto resched; 989 } 990 len = sizeof(*ip) + sizeof(ch); 991 m->m_pkthdr.len = len; 992 m->m_pkthdr.rcvif = NULL; 993 m->m_len = len; 994 M_ALIGN(m, m->m_len); 995 if (IN_MULTICAST(sc->sc_carpaddr.s_addr)) 996 m->m_flags |= M_MCAST; 997 ip = mtod(m, struct ip *); 998 ip->ip_v = IPVERSION; 999 ip->ip_hl = sizeof(*ip) >> 2; 1000 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 1001 ip->ip_len = htons(len); 1002 ip->ip_off = htons(IP_DF); 1003 ip->ip_ttl = CARP_DFLTTL; 1004 ip->ip_p = IPPROTO_CARP; 1005 ip->ip_sum = 0; 1006 ip_fillid(ip); 1007 1008 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1009 if (ifa != NULL) { 1010 ip->ip_src.s_addr = 1011 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1012 ifa_free(ifa); 1013 } else 1014 ip->ip_src.s_addr = 0; 1015 ip->ip_dst = sc->sc_carpaddr; 1016 1017 ch_ptr = (struct carp_header *)(&ip[1]); 1018 bcopy(&ch, ch_ptr, sizeof(ch)); 1019 if (carp_prepare_ad(m, sc, ch_ptr)) 1020 goto resched; 1021 1022 m->m_data += sizeof(*ip); 1023 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1024 m->m_data -= sizeof(*ip); 1025 1026 CARPSTATS_INC(carps_opackets); 1027 1028 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1029 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1030 } 1031 #endif /* INET */ 1032 #ifdef INET6 1033 if (sc->sc_naddrs6) { 1034 struct ip6_hdr *ip6; 1035 1036 m = m_gethdr(M_NOWAIT, MT_DATA); 1037 if (m == NULL) { 1038 CARPSTATS_INC(carps_onomem); 1039 goto resched; 1040 } 1041 len = sizeof(*ip6) + sizeof(ch); 1042 m->m_pkthdr.len = len; 1043 m->m_pkthdr.rcvif = NULL; 1044 m->m_len = len; 1045 M_ALIGN(m, m->m_len); 1046 ip6 = mtod(m, struct ip6_hdr *); 1047 bzero(ip6, sizeof(*ip6)); 1048 ip6->ip6_vfc |= IPV6_VERSION; 1049 /* Traffic class isn't defined in ip6 struct instead 1050 * it gets offset into flowid field */ 1051 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1052 IPTOS_DSCP_OFFSET)); 1053 ip6->ip6_hlim = CARP_DFLTTL; 1054 ip6->ip6_nxt = IPPROTO_CARP; 1055 1056 /* set the source address */ 1057 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1058 if (ifa != NULL) { 1059 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1060 sizeof(struct in6_addr)); 1061 ifa_free(ifa); 1062 } else 1063 /* This should never happen with IPv6. */ 1064 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1065 1066 /* Set the multicast destination. */ 1067 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1068 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 1069 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1070 m_freem(m); 1071 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1072 goto resched; 1073 } 1074 } 1075 1076 ch_ptr = (struct carp_header *)(&ip6[1]); 1077 bcopy(&ch, ch_ptr, sizeof(ch)); 1078 if (carp_prepare_ad(m, sc, ch_ptr)) 1079 goto resched; 1080 1081 m->m_data += sizeof(*ip6); 1082 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1083 m->m_data -= sizeof(*ip6); 1084 1085 CARPSTATS_INC(carps_opackets6); 1086 1087 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1088 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1089 } 1090 #endif /* INET6 */ 1091 1092 resched: 1093 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1094 } 1095 1096 static void 1097 carp_addroute(struct carp_softc *sc) 1098 { 1099 struct ifaddr *ifa; 1100 1101 CARP_FOREACH_IFA(sc, ifa) 1102 carp_ifa_addroute(ifa); 1103 } 1104 1105 static void 1106 carp_ifa_addroute(struct ifaddr *ifa) 1107 { 1108 1109 switch (ifa->ifa_addr->sa_family) { 1110 #ifdef INET 1111 case AF_INET: 1112 in_addprefix(ifatoia(ifa)); 1113 ifa_add_loopback_route(ifa, 1114 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1115 break; 1116 #endif 1117 #ifdef INET6 1118 case AF_INET6: 1119 ifa_add_loopback_route(ifa, 1120 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1121 nd6_add_ifa_lle(ifatoia6(ifa)); 1122 break; 1123 #endif 1124 } 1125 } 1126 1127 static void 1128 carp_delroute(struct carp_softc *sc) 1129 { 1130 struct ifaddr *ifa; 1131 1132 CARP_FOREACH_IFA(sc, ifa) 1133 carp_ifa_delroute(ifa); 1134 } 1135 1136 static void 1137 carp_ifa_delroute(struct ifaddr *ifa) 1138 { 1139 1140 switch (ifa->ifa_addr->sa_family) { 1141 #ifdef INET 1142 case AF_INET: 1143 ifa_del_loopback_route(ifa, 1144 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1145 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1146 break; 1147 #endif 1148 #ifdef INET6 1149 case AF_INET6: 1150 ifa_del_loopback_route(ifa, 1151 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1152 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1153 break; 1154 #endif 1155 } 1156 } 1157 1158 int 1159 carp_master(struct ifaddr *ifa) 1160 { 1161 struct carp_softc *sc = ifa->ifa_carp; 1162 1163 return (sc->sc_state == MASTER); 1164 } 1165 1166 #ifdef INET 1167 /* 1168 * Broadcast a gratuitous ARP request containing 1169 * the virtual router MAC address for each IP address 1170 * associated with the virtual router. 1171 */ 1172 static void 1173 carp_send_arp(struct carp_softc *sc) 1174 { 1175 struct ifaddr *ifa; 1176 struct in_addr addr; 1177 1178 NET_EPOCH_ASSERT(); 1179 1180 CARP_FOREACH_IFA(sc, ifa) { 1181 if (ifa->ifa_addr->sa_family != AF_INET) 1182 continue; 1183 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1184 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1185 } 1186 } 1187 1188 int 1189 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1190 { 1191 struct carp_softc *sc = ifa->ifa_carp; 1192 1193 if (sc->sc_state == MASTER) { 1194 *enaddr = LLADDR(&sc->sc_addr); 1195 return (1); 1196 } 1197 1198 return (0); 1199 } 1200 #endif 1201 1202 #ifdef INET6 1203 static void 1204 carp_send_na(struct carp_softc *sc) 1205 { 1206 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1207 struct ifaddr *ifa; 1208 struct in6_addr *in6; 1209 1210 CARP_FOREACH_IFA(sc, ifa) { 1211 if (ifa->ifa_addr->sa_family != AF_INET6) 1212 continue; 1213 1214 in6 = IFA_IN6(ifa); 1215 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1216 ND_NA_FLAG_OVERRIDE, 1, NULL); 1217 DELAY(1000); /* XXX */ 1218 } 1219 } 1220 1221 /* 1222 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1223 * matches and is not a carp address. Returns NULL otherwise. 1224 */ 1225 struct ifaddr * 1226 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1227 { 1228 struct ifaddr *ifa; 1229 1230 NET_EPOCH_ASSERT(); 1231 1232 ifa = NULL; 1233 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1234 if (ifa->ifa_addr->sa_family != AF_INET6) 1235 continue; 1236 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1237 continue; 1238 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1239 ifa = NULL; 1240 else 1241 ifa_ref(ifa); 1242 break; 1243 } 1244 1245 return (ifa); 1246 } 1247 1248 char * 1249 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1250 { 1251 struct ifaddr *ifa; 1252 1253 NET_EPOCH_ASSERT(); 1254 1255 IFNET_FOREACH_IFA(ifp, ifa) 1256 if (ifa->ifa_addr->sa_family == AF_INET6 && 1257 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1258 struct carp_softc *sc = ifa->ifa_carp; 1259 struct m_tag *mtag; 1260 1261 mtag = m_tag_get(PACKET_TAG_CARP, 1262 sizeof(struct carp_softc *), M_NOWAIT); 1263 if (mtag == NULL) 1264 /* Better a bit than nothing. */ 1265 return (LLADDR(&sc->sc_addr)); 1266 1267 bcopy(&sc, mtag + 1, sizeof(sc)); 1268 m_tag_prepend(m, mtag); 1269 1270 return (LLADDR(&sc->sc_addr)); 1271 } 1272 1273 return (NULL); 1274 } 1275 #endif /* INET6 */ 1276 1277 int 1278 carp_forus(struct ifnet *ifp, u_char *dhost) 1279 { 1280 struct carp_softc *sc; 1281 uint8_t *ena = dhost; 1282 1283 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1284 return (0); 1285 1286 CIF_LOCK(ifp->if_carp); 1287 IFNET_FOREACH_CARP(ifp, sc) { 1288 /* 1289 * CARP_LOCK() is not here, since would protect nothing, but 1290 * cause deadlock with if_bridge, calling this under its lock. 1291 */ 1292 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1293 ETHER_ADDR_LEN)) { 1294 CIF_UNLOCK(ifp->if_carp); 1295 return (1); 1296 } 1297 } 1298 CIF_UNLOCK(ifp->if_carp); 1299 1300 return (0); 1301 } 1302 1303 /* Master down timeout event, executed in callout context. */ 1304 static void 1305 carp_master_down(void *v) 1306 { 1307 struct carp_softc *sc = v; 1308 struct epoch_tracker et; 1309 1310 NET_EPOCH_ENTER(et); 1311 CARP_LOCK_ASSERT(sc); 1312 1313 CURVNET_SET(sc->sc_carpdev->if_vnet); 1314 if (sc->sc_state == BACKUP) { 1315 carp_master_down_locked(sc, "master timed out"); 1316 } 1317 CURVNET_RESTORE(); 1318 1319 CARP_UNLOCK(sc); 1320 NET_EPOCH_EXIT(et); 1321 } 1322 1323 static void 1324 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1325 { 1326 1327 NET_EPOCH_ASSERT(); 1328 CARP_LOCK_ASSERT(sc); 1329 1330 switch (sc->sc_state) { 1331 case BACKUP: 1332 carp_set_state(sc, MASTER, reason); 1333 carp_send_ad_locked(sc); 1334 #ifdef INET 1335 carp_send_arp(sc); 1336 #endif 1337 #ifdef INET6 1338 carp_send_na(sc); 1339 #endif 1340 carp_setrun(sc, 0); 1341 carp_addroute(sc); 1342 break; 1343 case INIT: 1344 case MASTER: 1345 #ifdef INVARIANTS 1346 panic("carp: VHID %u@%s: master_down event in %s state\n", 1347 sc->sc_vhid, 1348 if_name(sc->sc_carpdev), 1349 sc->sc_state ? "MASTER" : "INIT"); 1350 #endif 1351 break; 1352 } 1353 } 1354 1355 /* 1356 * When in backup state, af indicates whether to reset the master down timer 1357 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1358 */ 1359 static void 1360 carp_setrun(struct carp_softc *sc, sa_family_t af) 1361 { 1362 struct timeval tv; 1363 1364 CARP_LOCK_ASSERT(sc); 1365 1366 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1367 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1368 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1369 !V_carp_allow) 1370 return; 1371 1372 switch (sc->sc_state) { 1373 case INIT: 1374 carp_set_state(sc, BACKUP, "initialization complete"); 1375 carp_setrun(sc, 0); 1376 break; 1377 case BACKUP: 1378 callout_stop(&sc->sc_ad_tmo); 1379 tv.tv_sec = 3 * sc->sc_advbase; 1380 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1381 switch (af) { 1382 #ifdef INET 1383 case AF_INET: 1384 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1385 carp_master_down, sc); 1386 break; 1387 #endif 1388 #ifdef INET6 1389 case AF_INET6: 1390 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1391 carp_master_down, sc); 1392 break; 1393 #endif 1394 default: 1395 #ifdef INET 1396 if (sc->sc_naddrs) 1397 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1398 carp_master_down, sc); 1399 #endif 1400 #ifdef INET6 1401 if (sc->sc_naddrs6) 1402 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1403 carp_master_down, sc); 1404 #endif 1405 break; 1406 } 1407 break; 1408 case MASTER: 1409 tv.tv_sec = sc->sc_advbase; 1410 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1411 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1412 carp_send_ad, sc); 1413 break; 1414 } 1415 } 1416 1417 /* 1418 * Setup multicast structures. 1419 */ 1420 static int 1421 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1422 { 1423 struct ifnet *ifp = cif->cif_ifp; 1424 int error = 0; 1425 1426 switch (sa) { 1427 #ifdef INET 1428 case AF_INET: 1429 { 1430 struct ip_moptions *imo = &cif->cif_imo; 1431 struct in_mfilter *imf; 1432 struct in_addr addr; 1433 1434 if (ip_mfilter_first(&imo->imo_head) != NULL) 1435 return (0); 1436 1437 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1438 ip_mfilter_init(&imo->imo_head); 1439 imo->imo_multicast_vif = -1; 1440 1441 addr.s_addr = htonl(INADDR_CARP_GROUP); 1442 if ((error = in_joingroup(ifp, &addr, NULL, 1443 &imf->imf_inm)) != 0) { 1444 ip_mfilter_free(imf); 1445 break; 1446 } 1447 1448 ip_mfilter_insert(&imo->imo_head, imf); 1449 imo->imo_multicast_ifp = ifp; 1450 imo->imo_multicast_ttl = CARP_DFLTTL; 1451 imo->imo_multicast_loop = 0; 1452 break; 1453 } 1454 #endif 1455 #ifdef INET6 1456 case AF_INET6: 1457 { 1458 struct ip6_moptions *im6o = &cif->cif_im6o; 1459 struct in6_mfilter *im6f[2]; 1460 struct in6_addr in6; 1461 1462 if (ip6_mfilter_first(&im6o->im6o_head)) 1463 return (0); 1464 1465 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1466 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1467 1468 ip6_mfilter_init(&im6o->im6o_head); 1469 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1470 im6o->im6o_multicast_ifp = ifp; 1471 1472 /* Join IPv6 CARP multicast group. */ 1473 bzero(&in6, sizeof(in6)); 1474 in6.s6_addr16[0] = htons(0xff02); 1475 in6.s6_addr8[15] = 0x12; 1476 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1477 ip6_mfilter_free(im6f[0]); 1478 ip6_mfilter_free(im6f[1]); 1479 break; 1480 } 1481 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1482 ip6_mfilter_free(im6f[0]); 1483 ip6_mfilter_free(im6f[1]); 1484 break; 1485 } 1486 1487 /* Join solicited multicast address. */ 1488 bzero(&in6, sizeof(in6)); 1489 in6.s6_addr16[0] = htons(0xff02); 1490 in6.s6_addr32[1] = 0; 1491 in6.s6_addr32[2] = htonl(1); 1492 in6.s6_addr32[3] = 0; 1493 in6.s6_addr8[12] = 0xff; 1494 1495 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1496 ip6_mfilter_free(im6f[0]); 1497 ip6_mfilter_free(im6f[1]); 1498 break; 1499 } 1500 1501 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1502 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1503 ip6_mfilter_free(im6f[0]); 1504 ip6_mfilter_free(im6f[1]); 1505 break; 1506 } 1507 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1508 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1509 break; 1510 } 1511 #endif 1512 } 1513 1514 return (error); 1515 } 1516 1517 /* 1518 * Free multicast structures. 1519 */ 1520 static void 1521 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1522 { 1523 #ifdef INET 1524 struct ip_moptions *imo = &cif->cif_imo; 1525 struct in_mfilter *imf; 1526 #endif 1527 #ifdef INET6 1528 struct ip6_moptions *im6o = &cif->cif_im6o; 1529 struct in6_mfilter *im6f; 1530 #endif 1531 sx_assert(&carp_sx, SA_XLOCKED); 1532 1533 switch (sa) { 1534 #ifdef INET 1535 case AF_INET: 1536 if (cif->cif_naddrs != 0) 1537 break; 1538 1539 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1540 ip_mfilter_remove(&imo->imo_head, imf); 1541 in_leavegroup(imf->imf_inm, NULL); 1542 ip_mfilter_free(imf); 1543 } 1544 break; 1545 #endif 1546 #ifdef INET6 1547 case AF_INET6: 1548 if (cif->cif_naddrs6 != 0) 1549 break; 1550 1551 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 1552 ip6_mfilter_remove(&im6o->im6o_head, im6f); 1553 in6_leavegroup(im6f->im6f_in6m, NULL); 1554 ip6_mfilter_free(im6f); 1555 } 1556 break; 1557 #endif 1558 } 1559 } 1560 1561 int 1562 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1563 { 1564 struct m_tag *mtag; 1565 struct carp_softc *sc; 1566 1567 if (!sa) 1568 return (0); 1569 1570 switch (sa->sa_family) { 1571 #ifdef INET 1572 case AF_INET: 1573 break; 1574 #endif 1575 #ifdef INET6 1576 case AF_INET6: 1577 break; 1578 #endif 1579 default: 1580 return (0); 1581 } 1582 1583 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1584 if (mtag == NULL) 1585 return (0); 1586 1587 bcopy(mtag + 1, &sc, sizeof(sc)); 1588 1589 switch (sa->sa_family) { 1590 case AF_INET: 1591 if (! IN_MULTICAST(sc->sc_carpaddr.s_addr)) 1592 return (0); 1593 break; 1594 case AF_INET6: 1595 if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6)) 1596 return (0); 1597 break; 1598 default: 1599 panic("Unknown af"); 1600 } 1601 1602 /* Set the source MAC address to the Virtual Router MAC Address. */ 1603 switch (ifp->if_type) { 1604 case IFT_ETHER: 1605 case IFT_BRIDGE: 1606 case IFT_L2VLAN: { 1607 struct ether_header *eh; 1608 1609 eh = mtod(m, struct ether_header *); 1610 eh->ether_shost[0] = 0; 1611 eh->ether_shost[1] = 0; 1612 eh->ether_shost[2] = 0x5e; 1613 eh->ether_shost[3] = 0; 1614 eh->ether_shost[4] = 1; 1615 eh->ether_shost[5] = sc->sc_vhid; 1616 } 1617 break; 1618 default: 1619 printf("%s: carp is not supported for the %d interface type\n", 1620 if_name(ifp), ifp->if_type); 1621 return (EOPNOTSUPP); 1622 } 1623 1624 return (0); 1625 } 1626 1627 static struct carp_softc* 1628 carp_alloc(struct ifnet *ifp) 1629 { 1630 struct carp_softc *sc; 1631 struct carp_if *cif; 1632 1633 sx_assert(&carp_sx, SA_XLOCKED); 1634 1635 if ((cif = ifp->if_carp) == NULL) 1636 cif = carp_alloc_if(ifp); 1637 1638 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1639 1640 sc->sc_advbase = CARP_DFLTINTV; 1641 sc->sc_vhid = -1; /* required setting */ 1642 sc->sc_init_counter = 1; 1643 sc->sc_state = INIT; 1644 1645 sc->sc_ifasiz = sizeof(struct ifaddr *); 1646 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1647 sc->sc_carpdev = ifp; 1648 1649 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 1650 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1651 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 1652 1653 CARP_LOCK_INIT(sc); 1654 #ifdef INET 1655 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1656 #endif 1657 #ifdef INET6 1658 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1659 #endif 1660 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1661 1662 CIF_LOCK(cif); 1663 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1664 CIF_UNLOCK(cif); 1665 1666 mtx_lock(&carp_mtx); 1667 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1668 mtx_unlock(&carp_mtx); 1669 1670 return (sc); 1671 } 1672 1673 static void 1674 carp_grow_ifas(struct carp_softc *sc) 1675 { 1676 struct ifaddr **new; 1677 1678 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1679 CARP_LOCK(sc); 1680 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1681 free(sc->sc_ifas, M_CARP); 1682 sc->sc_ifas = new; 1683 sc->sc_ifasiz *= 2; 1684 CARP_UNLOCK(sc); 1685 } 1686 1687 static void 1688 carp_destroy(struct carp_softc *sc) 1689 { 1690 struct ifnet *ifp = sc->sc_carpdev; 1691 struct carp_if *cif = ifp->if_carp; 1692 1693 sx_assert(&carp_sx, SA_XLOCKED); 1694 1695 if (sc->sc_suppress) 1696 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1697 CARP_UNLOCK(sc); 1698 1699 CIF_LOCK(cif); 1700 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1701 CIF_UNLOCK(cif); 1702 1703 mtx_lock(&carp_mtx); 1704 LIST_REMOVE(sc, sc_next); 1705 mtx_unlock(&carp_mtx); 1706 1707 callout_drain(&sc->sc_ad_tmo); 1708 #ifdef INET 1709 callout_drain(&sc->sc_md_tmo); 1710 #endif 1711 #ifdef INET6 1712 callout_drain(&sc->sc_md6_tmo); 1713 #endif 1714 CARP_LOCK_DESTROY(sc); 1715 1716 free(sc->sc_ifas, M_CARP); 1717 free(sc, M_CARP); 1718 } 1719 1720 static struct carp_if* 1721 carp_alloc_if(struct ifnet *ifp) 1722 { 1723 struct carp_if *cif; 1724 int error; 1725 1726 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1727 1728 if ((error = ifpromisc(ifp, 1)) != 0) 1729 printf("%s: ifpromisc(%s) failed: %d\n", 1730 __func__, if_name(ifp), error); 1731 else 1732 cif->cif_flags |= CIF_PROMISC; 1733 1734 CIF_LOCK_INIT(cif); 1735 cif->cif_ifp = ifp; 1736 TAILQ_INIT(&cif->cif_vrs); 1737 1738 IF_ADDR_WLOCK(ifp); 1739 ifp->if_carp = cif; 1740 if_ref(ifp); 1741 IF_ADDR_WUNLOCK(ifp); 1742 1743 return (cif); 1744 } 1745 1746 static void 1747 carp_free_if(struct carp_if *cif) 1748 { 1749 struct ifnet *ifp = cif->cif_ifp; 1750 1751 CIF_LOCK_ASSERT(cif); 1752 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1753 __func__)); 1754 1755 IF_ADDR_WLOCK(ifp); 1756 ifp->if_carp = NULL; 1757 IF_ADDR_WUNLOCK(ifp); 1758 1759 CIF_LOCK_DESTROY(cif); 1760 1761 if (cif->cif_flags & CIF_PROMISC) 1762 ifpromisc(ifp, 0); 1763 if_rele(ifp); 1764 1765 free(cif, M_CARP); 1766 } 1767 1768 static bool 1769 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 1770 { 1771 struct carpreq *carpr = arg; 1772 1773 CARP_LOCK(sc); 1774 carpr->carpr_state = sc->sc_state; 1775 carpr->carpr_vhid = sc->sc_vhid; 1776 carpr->carpr_advbase = sc->sc_advbase; 1777 carpr->carpr_advskew = sc->sc_advskew; 1778 if (priv) 1779 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1780 else 1781 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1782 CARP_UNLOCK(sc); 1783 1784 return (true); 1785 } 1786 1787 static int 1788 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 1789 { 1790 struct epoch_tracker et; 1791 struct carp_softc *sc = NULL; 1792 int error = 0; 1793 1794 1795 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID || 1796 carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) { 1797 return (EINVAL); 1798 } 1799 1800 if (ifp->if_carp) { 1801 IFNET_FOREACH_CARP(ifp, sc) 1802 if (sc->sc_vhid == carpr->carpr_vhid) 1803 break; 1804 } 1805 if (sc == NULL) { 1806 sc = carp_alloc(ifp); 1807 CARP_LOCK(sc); 1808 sc->sc_vhid = carpr->carpr_vhid; 1809 LLADDR(&sc->sc_addr)[0] = 0; 1810 LLADDR(&sc->sc_addr)[1] = 0; 1811 LLADDR(&sc->sc_addr)[2] = 0x5e; 1812 LLADDR(&sc->sc_addr)[3] = 0; 1813 LLADDR(&sc->sc_addr)[4] = 1; 1814 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1815 } else 1816 CARP_LOCK(sc); 1817 if (carpr->carpr_advbase > 0) { 1818 if (carpr->carpr_advbase > 255 || 1819 carpr->carpr_advbase < CARP_DFLTINTV) { 1820 error = EINVAL; 1821 goto out; 1822 } 1823 sc->sc_advbase = carpr->carpr_advbase; 1824 } 1825 if (carpr->carpr_advskew >= 255) { 1826 error = EINVAL; 1827 goto out; 1828 } 1829 sc->sc_advskew = carpr->carpr_advskew; 1830 if (carpr->carpr_addr.s_addr != INADDR_ANY) 1831 sc->sc_carpaddr = carpr->carpr_addr; 1832 if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 1833 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 1834 sizeof(sc->sc_carpaddr6)); 1835 } 1836 if (carpr->carpr_key[0] != '\0') { 1837 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1838 carp_hmac_prepare(sc); 1839 } 1840 if (sc->sc_state != INIT && 1841 carpr->carpr_state != sc->sc_state) { 1842 switch (carpr->carpr_state) { 1843 case BACKUP: 1844 callout_stop(&sc->sc_ad_tmo); 1845 carp_set_state(sc, BACKUP, 1846 "user requested via ifconfig"); 1847 carp_setrun(sc, 0); 1848 carp_delroute(sc); 1849 break; 1850 case MASTER: 1851 NET_EPOCH_ENTER(et); 1852 carp_master_down_locked(sc, 1853 "user requested via ifconfig"); 1854 NET_EPOCH_EXIT(et); 1855 break; 1856 default: 1857 break; 1858 } 1859 } 1860 1861 out: 1862 CARP_UNLOCK(sc); 1863 1864 return (error); 1865 } 1866 1867 static int 1868 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 1869 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 1870 { 1871 int priveleged; 1872 struct carp_softc *sc; 1873 1874 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 1875 return (EINVAL); 1876 if (carpr->carpr_count < 1) 1877 return (EMSGSIZE); 1878 if (ifp->if_carp == NULL) 1879 return (ENOENT); 1880 1881 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 1882 if (carpr->carpr_vhid != 0) { 1883 IFNET_FOREACH_CARP(ifp, sc) 1884 if (sc->sc_vhid == carpr->carpr_vhid) 1885 break; 1886 if (sc == NULL) 1887 return (ENOENT); 1888 1889 if (! outfn(arg, sc, priveleged)) 1890 return (ENOMEM); 1891 carpr->carpr_count = 1; 1892 } else { 1893 int count; 1894 1895 count = 0; 1896 IFNET_FOREACH_CARP(ifp, sc) 1897 count++; 1898 1899 if (count > carpr->carpr_count) 1900 return (EMSGSIZE); 1901 1902 IFNET_FOREACH_CARP(ifp, sc) { 1903 if (! outfn(arg, sc, priveleged)) 1904 return (ENOMEM); 1905 carpr->carpr_count = count; 1906 } 1907 } 1908 1909 return (0); 1910 } 1911 1912 int 1913 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1914 { 1915 struct carpreq carpr; 1916 struct carpkreq carprk = { }; 1917 struct ifnet *ifp; 1918 int error = 0; 1919 1920 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 1921 return (error); 1922 1923 ifp = ifunit_ref(ifr->ifr_name); 1924 if ((error = carp_is_supported_if(ifp)) != 0) 1925 goto out; 1926 1927 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1928 error = EADDRNOTAVAIL; 1929 goto out; 1930 } 1931 1932 sx_xlock(&carp_sx); 1933 switch (cmd) { 1934 case SIOCSVH: 1935 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1936 break; 1937 1938 memcpy(&carprk, &carpr, sizeof(carpr)); 1939 error = carp_ioctl_set(ifp, &carprk); 1940 break; 1941 1942 case SIOCGVH: 1943 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 1944 carp_carprcp, &carpr); 1945 if (error == 0) { 1946 error = copyout(&carpr, 1947 (char *)ifr_data_get_ptr(ifr), 1948 carpr.carpr_count * sizeof(carpr)); 1949 } 1950 break; 1951 default: 1952 error = EINVAL; 1953 } 1954 sx_xunlock(&carp_sx); 1955 1956 out: 1957 if (ifp != NULL) 1958 if_rele(ifp); 1959 1960 return (error); 1961 } 1962 1963 static int 1964 carp_get_vhid(struct ifaddr *ifa) 1965 { 1966 1967 if (ifa == NULL || ifa->ifa_carp == NULL) 1968 return (0); 1969 1970 return (ifa->ifa_carp->sc_vhid); 1971 } 1972 1973 int 1974 carp_attach(struct ifaddr *ifa, int vhid) 1975 { 1976 struct ifnet *ifp = ifa->ifa_ifp; 1977 struct carp_if *cif = ifp->if_carp; 1978 struct carp_softc *sc; 1979 int index, error; 1980 1981 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1982 1983 switch (ifa->ifa_addr->sa_family) { 1984 #ifdef INET 1985 case AF_INET: 1986 #endif 1987 #ifdef INET6 1988 case AF_INET6: 1989 #endif 1990 break; 1991 default: 1992 return (EPROTOTYPE); 1993 } 1994 1995 sx_xlock(&carp_sx); 1996 if (ifp->if_carp == NULL) { 1997 sx_xunlock(&carp_sx); 1998 return (ENOPROTOOPT); 1999 } 2000 2001 IFNET_FOREACH_CARP(ifp, sc) 2002 if (sc->sc_vhid == vhid) 2003 break; 2004 if (sc == NULL) { 2005 sx_xunlock(&carp_sx); 2006 return (ENOENT); 2007 } 2008 2009 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2010 if (error) { 2011 CIF_FREE(cif); 2012 sx_xunlock(&carp_sx); 2013 return (error); 2014 } 2015 2016 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2017 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2018 carp_grow_ifas(sc); 2019 2020 switch (ifa->ifa_addr->sa_family) { 2021 #ifdef INET 2022 case AF_INET: 2023 cif->cif_naddrs++; 2024 sc->sc_naddrs++; 2025 break; 2026 #endif 2027 #ifdef INET6 2028 case AF_INET6: 2029 cif->cif_naddrs6++; 2030 sc->sc_naddrs6++; 2031 break; 2032 #endif 2033 } 2034 2035 ifa_ref(ifa); 2036 2037 CARP_LOCK(sc); 2038 sc->sc_ifas[index - 1] = ifa; 2039 ifa->ifa_carp = sc; 2040 carp_hmac_prepare(sc); 2041 carp_sc_state(sc); 2042 CARP_UNLOCK(sc); 2043 2044 sx_xunlock(&carp_sx); 2045 2046 return (0); 2047 } 2048 2049 void 2050 carp_detach(struct ifaddr *ifa, bool keep_cif) 2051 { 2052 struct ifnet *ifp = ifa->ifa_ifp; 2053 struct carp_if *cif = ifp->if_carp; 2054 struct carp_softc *sc = ifa->ifa_carp; 2055 int i, index; 2056 2057 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2058 2059 sx_xlock(&carp_sx); 2060 2061 CARP_LOCK(sc); 2062 /* Shift array. */ 2063 index = sc->sc_naddrs + sc->sc_naddrs6; 2064 for (i = 0; i < index; i++) 2065 if (sc->sc_ifas[i] == ifa) 2066 break; 2067 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2068 for (; i < index - 1; i++) 2069 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2070 sc->sc_ifas[index - 1] = NULL; 2071 2072 switch (ifa->ifa_addr->sa_family) { 2073 #ifdef INET 2074 case AF_INET: 2075 cif->cif_naddrs--; 2076 sc->sc_naddrs--; 2077 break; 2078 #endif 2079 #ifdef INET6 2080 case AF_INET6: 2081 cif->cif_naddrs6--; 2082 sc->sc_naddrs6--; 2083 break; 2084 #endif 2085 } 2086 2087 carp_ifa_delroute(ifa); 2088 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2089 2090 ifa->ifa_carp = NULL; 2091 ifa_free(ifa); 2092 2093 carp_hmac_prepare(sc); 2094 carp_sc_state(sc); 2095 2096 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2097 carp_destroy(sc); 2098 else 2099 CARP_UNLOCK(sc); 2100 2101 if (!keep_cif) 2102 CIF_FREE(cif); 2103 2104 sx_xunlock(&carp_sx); 2105 } 2106 2107 static void 2108 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2109 { 2110 2111 CARP_LOCK_ASSERT(sc); 2112 2113 if (sc->sc_state != state) { 2114 const char *carp_states[] = { CARP_STATES }; 2115 char subsys[IFNAMSIZ+5]; 2116 2117 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2118 if_name(sc->sc_carpdev)); 2119 2120 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2121 carp_states[sc->sc_state], carp_states[state], reason); 2122 2123 sc->sc_state = state; 2124 2125 devctl_notify("CARP", subsys, carp_states[state], NULL); 2126 } 2127 } 2128 2129 static void 2130 carp_linkstate(struct ifnet *ifp) 2131 { 2132 struct carp_softc *sc; 2133 2134 CIF_LOCK(ifp->if_carp); 2135 IFNET_FOREACH_CARP(ifp, sc) { 2136 CARP_LOCK(sc); 2137 carp_sc_state(sc); 2138 CARP_UNLOCK(sc); 2139 } 2140 CIF_UNLOCK(ifp->if_carp); 2141 } 2142 2143 static void 2144 carp_sc_state(struct carp_softc *sc) 2145 { 2146 2147 CARP_LOCK_ASSERT(sc); 2148 2149 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2150 !(sc->sc_carpdev->if_flags & IFF_UP) || 2151 !V_carp_allow) { 2152 callout_stop(&sc->sc_ad_tmo); 2153 #ifdef INET 2154 callout_stop(&sc->sc_md_tmo); 2155 #endif 2156 #ifdef INET6 2157 callout_stop(&sc->sc_md6_tmo); 2158 #endif 2159 carp_set_state(sc, INIT, "hardware interface down"); 2160 carp_setrun(sc, 0); 2161 if (!sc->sc_suppress) 2162 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2163 sc->sc_suppress = 1; 2164 } else { 2165 carp_set_state(sc, INIT, "hardware interface up"); 2166 carp_setrun(sc, 0); 2167 if (sc->sc_suppress) 2168 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2169 sc->sc_suppress = 0; 2170 } 2171 } 2172 2173 static void 2174 carp_demote_adj(int adj, char *reason) 2175 { 2176 atomic_add_int(&V_carp_demotion, adj); 2177 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2178 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2179 } 2180 2181 static int 2182 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2183 { 2184 int new, error; 2185 struct carp_softc *sc; 2186 2187 new = V_carp_allow; 2188 error = sysctl_handle_int(oidp, &new, 0, req); 2189 if (error || !req->newptr) 2190 return (error); 2191 2192 if (V_carp_allow != new) { 2193 V_carp_allow = new; 2194 2195 mtx_lock(&carp_mtx); 2196 LIST_FOREACH(sc, &carp_list, sc_next) { 2197 CARP_LOCK(sc); 2198 if (curvnet == sc->sc_carpdev->if_vnet) 2199 carp_sc_state(sc); 2200 CARP_UNLOCK(sc); 2201 } 2202 mtx_unlock(&carp_mtx); 2203 } 2204 2205 return (0); 2206 } 2207 2208 static int 2209 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2210 { 2211 int new, error; 2212 2213 new = V_carp_dscp; 2214 error = sysctl_handle_int(oidp, &new, 0, req); 2215 if (error || !req->newptr) 2216 return (error); 2217 2218 if (new < 0 || new > 63) 2219 return (EINVAL); 2220 2221 V_carp_dscp = new; 2222 2223 return (0); 2224 } 2225 2226 static int 2227 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2228 { 2229 int new, error; 2230 2231 new = V_carp_demotion; 2232 error = sysctl_handle_int(oidp, &new, 0, req); 2233 if (error || !req->newptr) 2234 return (error); 2235 2236 carp_demote_adj(new, "sysctl"); 2237 2238 return (0); 2239 } 2240 2241 static int 2242 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2243 { 2244 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2245 return (EINVAL); 2246 2247 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2248 return (0); 2249 } 2250 2251 struct carp_nl_send_args { 2252 struct nlmsghdr *hdr; 2253 struct nl_pstate *npt; 2254 }; 2255 2256 static bool 2257 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2258 { 2259 struct carp_nl_send_args *nlsa = arg; 2260 struct nlmsghdr *hdr = nlsa->hdr; 2261 struct nl_pstate *npt = nlsa->npt; 2262 struct nl_writer *nw = npt->nw; 2263 struct genlmsghdr *ghdr_new; 2264 2265 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2266 nlmsg_abort(nw); 2267 return (false); 2268 } 2269 2270 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2271 if (ghdr_new == NULL) { 2272 nlmsg_abort(nw); 2273 return (false); 2274 } 2275 2276 ghdr_new->cmd = CARP_NL_CMD_GET; 2277 ghdr_new->version = 0; 2278 ghdr_new->reserved = 0; 2279 2280 CARP_LOCK(sc); 2281 2282 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2283 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2284 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2285 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2286 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2287 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2288 2289 if (priv) 2290 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); 2291 2292 CARP_UNLOCK(sc); 2293 2294 if (! nlmsg_end(nw)) { 2295 nlmsg_abort(nw); 2296 return (false); 2297 } 2298 2299 return (true); 2300 } 2301 2302 struct nl_carp_parsed { 2303 unsigned int ifindex; 2304 uint32_t state; 2305 uint32_t vhid; 2306 int32_t advbase; 2307 int32_t advskew; 2308 char key[CARP_KEY_LEN]; 2309 struct in_addr addr; 2310 struct in6_addr addr6; 2311 }; 2312 2313 #define _IN(_field) offsetof(struct genlmsghdr, _field) 2314 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2315 2316 static const struct nlattr_parser nla_p_set[] = { 2317 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2318 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2319 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2320 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2321 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2322 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2323 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2324 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2325 }; 2326 static const struct nlfield_parser nlf_p_set[] = { 2327 }; 2328 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set); 2329 #undef _IN 2330 #undef _OUT 2331 2332 2333 static int 2334 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2335 { 2336 struct nl_carp_parsed attrs = { }; 2337 struct carp_nl_send_args args; 2338 struct carpreq carpr = { }; 2339 struct epoch_tracker et; 2340 if_t ifp; 2341 int error; 2342 2343 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2344 if (error != 0) 2345 return (error); 2346 2347 NET_EPOCH_ENTER(et); 2348 ifp = ifnet_byindex_ref(attrs.ifindex); 2349 NET_EPOCH_EXIT(et); 2350 2351 if ((error = carp_is_supported_if(ifp)) != 0) 2352 goto out; 2353 2354 hdr->nlmsg_flags |= NLM_F_MULTI; 2355 args.hdr = hdr; 2356 args.npt = npt; 2357 2358 carpr.carpr_vhid = attrs.vhid; 2359 carpr.carpr_count = CARP_MAXVHID; 2360 2361 sx_xlock(&carp_sx); 2362 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2363 carp_nl_send, &args); 2364 sx_xunlock(&carp_sx); 2365 2366 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2367 error = ENOMEM; 2368 2369 out: 2370 if (ifp != NULL) 2371 if_rele(ifp); 2372 2373 return (error); 2374 } 2375 2376 static int 2377 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2378 { 2379 struct nl_carp_parsed attrs = { }; 2380 struct carpkreq carpr; 2381 struct epoch_tracker et; 2382 if_t ifp; 2383 int error; 2384 2385 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2386 if (error != 0) 2387 return (error); 2388 2389 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2390 return (EINVAL); 2391 if (attrs.state > CARP_MAXSTATE) 2392 return (EINVAL); 2393 if (attrs.advbase < 0 || attrs.advskew < 0) 2394 return (EINVAL); 2395 if (attrs.advbase > 255) 2396 return (EINVAL); 2397 if (attrs.advskew >= 255) 2398 return (EINVAL); 2399 2400 NET_EPOCH_ENTER(et); 2401 ifp = ifnet_byindex_ref(attrs.ifindex); 2402 NET_EPOCH_EXIT(et); 2403 2404 if ((error = carp_is_supported_if(ifp)) != 0) 2405 goto out; 2406 2407 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2408 error = EADDRNOTAVAIL; 2409 goto out; 2410 } 2411 2412 carpr.carpr_count = 1; 2413 carpr.carpr_vhid = attrs.vhid; 2414 carpr.carpr_state = attrs.state; 2415 carpr.carpr_advbase = attrs.advbase; 2416 carpr.carpr_advskew = attrs.advskew; 2417 carpr.carpr_addr = attrs.addr; 2418 carpr.carpr_addr6 = attrs.addr6; 2419 2420 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2421 2422 sx_xlock(&carp_sx); 2423 error = carp_ioctl_set(ifp, &carpr); 2424 sx_xunlock(&carp_sx); 2425 2426 out: 2427 if (ifp != NULL) 2428 if_rele(ifp); 2429 2430 return (error); 2431 } 2432 2433 static const struct nlhdr_parser *all_parsers[] = { 2434 &carp_parser 2435 }; 2436 2437 static const struct genl_cmd carp_cmds[] = { 2438 { 2439 .cmd_num = CARP_NL_CMD_GET, 2440 .cmd_name = "SIOCGVH", 2441 .cmd_cb = carp_nl_get, 2442 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2443 GENL_CMD_CAP_HASPOL, 2444 }, 2445 { 2446 .cmd_num = CARP_NL_CMD_SET, 2447 .cmd_name = "SIOCSVH", 2448 .cmd_cb = carp_nl_set, 2449 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2450 .cmd_priv = PRIV_NETINET_CARP, 2451 }, 2452 }; 2453 2454 static void 2455 carp_nl_register(void) 2456 { 2457 bool ret __diagused; 2458 int family_id __diagused; 2459 2460 NL_VERIFY_PARSERS(all_parsers); 2461 family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 2462 CARP_NL_CMD_MAX); 2463 MPASS(family_id != 0); 2464 2465 ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds, 2466 NL_ARRAY_LEN(carp_cmds)); 2467 MPASS(ret); 2468 } 2469 2470 static void 2471 carp_nl_unregister(void) 2472 { 2473 genl_unregister_family(CARP_NL_FAMILY_NAME); 2474 } 2475 2476 static void 2477 carp_mod_cleanup(void) 2478 { 2479 2480 carp_nl_unregister(); 2481 2482 #ifdef INET 2483 (void)ipproto_unregister(IPPROTO_CARP); 2484 carp_iamatch_p = NULL; 2485 #endif 2486 #ifdef INET6 2487 (void)ip6proto_unregister(IPPROTO_CARP); 2488 carp_iamatch6_p = NULL; 2489 carp_macmatch6_p = NULL; 2490 #endif 2491 carp_ioctl_p = NULL; 2492 carp_attach_p = NULL; 2493 carp_detach_p = NULL; 2494 carp_get_vhid_p = NULL; 2495 carp_linkstate_p = NULL; 2496 carp_forus_p = NULL; 2497 carp_output_p = NULL; 2498 carp_demote_adj_p = NULL; 2499 carp_master_p = NULL; 2500 mtx_unlock(&carp_mtx); 2501 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2502 mtx_destroy(&carp_mtx); 2503 sx_destroy(&carp_sx); 2504 } 2505 2506 static void 2507 ipcarp_sysinit(void) 2508 { 2509 2510 /* Load allow as tunable so to postpone carp start after module load */ 2511 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 2512 } 2513 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 2514 2515 static int 2516 carp_mod_load(void) 2517 { 2518 int err; 2519 2520 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2521 sx_init(&carp_sx, "carp_sx"); 2522 LIST_INIT(&carp_list); 2523 carp_get_vhid_p = carp_get_vhid; 2524 carp_forus_p = carp_forus; 2525 carp_output_p = carp_output; 2526 carp_linkstate_p = carp_linkstate; 2527 carp_ioctl_p = carp_ioctl; 2528 carp_attach_p = carp_attach; 2529 carp_detach_p = carp_detach; 2530 carp_demote_adj_p = carp_demote_adj; 2531 carp_master_p = carp_master; 2532 #ifdef INET6 2533 carp_iamatch6_p = carp_iamatch6; 2534 carp_macmatch6_p = carp_macmatch6; 2535 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 2536 if (err) { 2537 printf("carp: error %d registering with INET6\n", err); 2538 carp_mod_cleanup(); 2539 return (err); 2540 } 2541 #endif 2542 #ifdef INET 2543 carp_iamatch_p = carp_iamatch; 2544 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 2545 if (err) { 2546 printf("carp: error %d registering with INET\n", err); 2547 carp_mod_cleanup(); 2548 return (err); 2549 } 2550 #endif 2551 2552 carp_nl_register(); 2553 2554 return (0); 2555 } 2556 2557 static int 2558 carp_modevent(module_t mod, int type, void *data) 2559 { 2560 switch (type) { 2561 case MOD_LOAD: 2562 return carp_mod_load(); 2563 /* NOTREACHED */ 2564 case MOD_UNLOAD: 2565 mtx_lock(&carp_mtx); 2566 if (LIST_EMPTY(&carp_list)) 2567 carp_mod_cleanup(); 2568 else { 2569 mtx_unlock(&carp_mtx); 2570 return (EBUSY); 2571 } 2572 break; 2573 2574 default: 2575 return (EINVAL); 2576 } 2577 2578 return (0); 2579 } 2580 2581 static moduledata_t carp_mod = { 2582 "carp", 2583 carp_modevent, 2584 0 2585 }; 2586 2587 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2588