1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Michael Shalayeff. 5 * Copyright (c) 2003 Ryan McBride. 6 * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 22 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 28 * THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_bpf.h" 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/devctl.h> 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/module.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/socket.h> 50 #include <sys/sockio.h> 51 #include <sys/sysctl.h> 52 #include <sys/syslog.h> 53 #include <sys/taskqueue.h> 54 #include <sys/counter.h> 55 56 #include <net/ethernet.h> 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/if_llatbl.h> 61 #include <net/if_private.h> 62 #include <net/if_types.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #if defined(INET) || defined(INET6) 67 #include <netinet/in.h> 68 #include <netinet/in_var.h> 69 #include <netinet/ip_carp.h> 70 #include <netinet/ip_carp_nl.h> 71 #include <netinet/ip.h> 72 #include <machine/in_cksum.h> 73 #endif 74 #ifdef INET 75 #include <netinet/ip_var.h> 76 #include <netinet/if_ether.h> 77 #endif 78 79 #ifdef INET6 80 #include <netinet/icmp6.h> 81 #include <netinet/ip6.h> 82 #include <netinet6/in6_var.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet6/scope6_var.h> 85 #include <netinet6/nd6.h> 86 #endif 87 88 #include <netlink/netlink.h> 89 #include <netlink/netlink_ctl.h> 90 #include <netlink/netlink_generic.h> 91 #include <netlink/netlink_message_parser.h> 92 93 #include <crypto/sha1.h> 94 95 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); 96 97 struct carp_softc { 98 struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ 99 struct ifaddr **sc_ifas; /* Our ifaddrs. */ 100 struct sockaddr_dl sc_addr; /* Our link level address. */ 101 struct callout sc_ad_tmo; /* Advertising timeout. */ 102 #ifdef INET 103 struct callout sc_md_tmo; /* Master down timeout. */ 104 #endif 105 #ifdef INET6 106 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ 107 #endif 108 struct mtx sc_mtx; 109 110 int sc_vhid; 111 int sc_advskew; 112 int sc_advbase; 113 struct in_addr sc_carpaddr; 114 struct in6_addr sc_carpaddr6; 115 116 int sc_naddrs; 117 int sc_naddrs6; 118 int sc_ifasiz; 119 enum { INIT = 0, BACKUP, MASTER } sc_state; 120 int sc_suppress; 121 int sc_sendad_errors; 122 #define CARP_SENDAD_MAX_ERRORS 3 123 int sc_sendad_success; 124 #define CARP_SENDAD_MIN_SUCCESS 3 125 126 int sc_init_counter; 127 uint64_t sc_counter; 128 129 /* authentication */ 130 #define CARP_HMAC_PAD 64 131 unsigned char sc_key[CARP_KEY_LEN]; 132 unsigned char sc_pad[CARP_HMAC_PAD]; 133 SHA1_CTX sc_sha1; 134 135 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ 136 LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ 137 }; 138 139 struct carp_if { 140 #ifdef INET 141 int cif_naddrs; 142 #endif 143 #ifdef INET6 144 int cif_naddrs6; 145 #endif 146 TAILQ_HEAD(, carp_softc) cif_vrs; 147 #ifdef INET 148 struct ip_moptions cif_imo; 149 #endif 150 #ifdef INET6 151 struct ip6_moptions cif_im6o; 152 #endif 153 struct ifnet *cif_ifp; 154 struct mtx cif_mtx; 155 uint32_t cif_flags; 156 #define CIF_PROMISC 0x00000001 157 }; 158 159 /* Kernel equivalent of struct carpreq, but with more fields for new features. 160 * */ 161 struct carpkreq { 162 int carpr_count; 163 int carpr_vhid; 164 int carpr_state; 165 int carpr_advskew; 166 int carpr_advbase; 167 unsigned char carpr_key[CARP_KEY_LEN]; 168 /* Everything above this is identical to carpreq */ 169 struct in_addr carpr_addr; 170 struct in6_addr carpr_addr6; 171 }; 172 173 /* 174 * Brief design of carp(4). 175 * 176 * Any carp-capable ifnet may have a list of carp softcs hanging off 177 * its ifp->if_carp pointer. Each softc represents one unique virtual 178 * host id, or vhid. The softc has a back pointer to the ifnet. All 179 * softcs are joined in a global list, which has quite limited use. 180 * 181 * Any interface address that takes part in CARP negotiation has a 182 * pointer to the softc of its vhid, ifa->ifa_carp. That could be either 183 * AF_INET or AF_INET6 address. 184 * 185 * Although, one can get the softc's backpointer to ifnet and traverse 186 * through its ifp->if_addrhead queue to find all interface addresses 187 * involved in CARP, we keep a growable array of ifaddr pointers. This 188 * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that 189 * do calls into the network stack, thus avoiding LORs. 190 * 191 * Locking: 192 * 193 * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), 194 * callout-driven events and ioctl()s. 195 * 196 * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. 197 * To traverse the global list we use the mutex carp_mtx. 198 * 199 * Known issues with locking: 200 * 201 * - Sending ad, we put the pointer to the softc in an mtag, and no reference 202 * counting is done on the softc. 203 * - On module unload we may race (?) with packet processing thread 204 * dereferencing our function pointers. 205 */ 206 207 /* Accept incoming CARP packets. */ 208 VNET_DEFINE_STATIC(int, carp_allow) = 1; 209 #define V_carp_allow VNET(carp_allow) 210 211 /* Set DSCP in outgoing CARP packets. */ 212 VNET_DEFINE_STATIC(int, carp_dscp) = 56; 213 #define V_carp_dscp VNET(carp_dscp) 214 215 /* Preempt slower nodes. */ 216 VNET_DEFINE_STATIC(int, carp_preempt) = 0; 217 #define V_carp_preempt VNET(carp_preempt) 218 219 /* Log level. */ 220 VNET_DEFINE_STATIC(int, carp_log) = 1; 221 #define V_carp_log VNET(carp_log) 222 223 /* Global advskew demotion. */ 224 VNET_DEFINE_STATIC(int, carp_demotion) = 0; 225 #define V_carp_demotion VNET(carp_demotion) 226 227 /* Send error demotion factor. */ 228 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; 229 #define V_carp_senderr_adj VNET(carp_senderr_adj) 230 231 /* Iface down demotion factor. */ 232 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; 233 #define V_carp_ifdown_adj VNET(carp_ifdown_adj) 234 235 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); 236 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); 237 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); 238 239 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 240 "CARP"); 241 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, 242 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 243 &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", 244 "Accept incoming CARP packets"); 245 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, 246 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 247 0, 0, carp_dscp_sysctl, "I", 248 "DSCP value for carp packets"); 249 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, 250 &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); 251 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, 252 &VNET_NAME(carp_log), 0, "CARP log level"); 253 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, 254 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 255 0, 0, carp_demote_adj_sysctl, "I", 256 "Adjust demotion factor (skew of advskew)"); 257 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, 258 CTLFLAG_VNET | CTLFLAG_RW, 259 &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); 260 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, 261 CTLFLAG_VNET | CTLFLAG_RW, 262 &VNET_NAME(carp_ifdown_adj), 0, 263 "Interface down demotion factor adjustment"); 264 265 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); 266 VNET_PCPUSTAT_SYSINIT(carpstats); 267 VNET_PCPUSTAT_SYSUNINIT(carpstats); 268 269 #define CARPSTATS_ADD(name, val) \ 270 counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ 271 sizeof(uint64_t)], (val)) 272 #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) 273 274 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, 275 carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 276 277 #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ 278 NULL, MTX_DEF) 279 #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) 280 #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 281 #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 282 #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 283 #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ 284 NULL, MTX_DEF) 285 #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) 286 #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) 287 #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) 288 #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) 289 #define CIF_FREE(cif) do { \ 290 CIF_LOCK(cif); \ 291 if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ 292 carp_free_if(cif); \ 293 else \ 294 CIF_UNLOCK(cif); \ 295 } while (0) 296 297 #define CARP_LOG(...) do { \ 298 if (V_carp_log > 0) \ 299 log(LOG_INFO, "carp: " __VA_ARGS__); \ 300 } while (0) 301 302 #define CARP_DEBUG(...) do { \ 303 if (V_carp_log > 1) \ 304 log(LOG_DEBUG, __VA_ARGS__); \ 305 } while (0) 306 307 #define IFNET_FOREACH_IFA(ifp, ifa) \ 308 CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ 309 if ((ifa)->ifa_carp != NULL) 310 311 #define CARP_FOREACH_IFA(sc, ifa) \ 312 CARP_LOCK_ASSERT(sc); \ 313 for (int _i = 0; \ 314 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ 315 ((ifa) = sc->sc_ifas[_i]) != NULL; \ 316 ++_i) 317 318 #define IFNET_FOREACH_CARP(ifp, sc) \ 319 KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ 320 sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ 321 TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) 322 323 #define DEMOTE_ADVSKEW(sc) \ 324 (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ 325 CARP_MAXSKEW : \ 326 (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 327 0 : ((sc)->sc_advskew + V_carp_demotion))) 328 329 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); 330 static struct carp_softc 331 *carp_alloc(struct ifnet *); 332 static void carp_destroy(struct carp_softc *); 333 static struct carp_if 334 *carp_alloc_if(struct ifnet *); 335 static void carp_free_if(struct carp_if *); 336 static void carp_set_state(struct carp_softc *, int, const char* reason); 337 static void carp_sc_state(struct carp_softc *); 338 static void carp_setrun(struct carp_softc *, sa_family_t); 339 static void carp_master_down(void *); 340 static void carp_master_down_locked(struct carp_softc *, 341 const char* reason); 342 static void carp_send_ad(void *); 343 static void carp_send_ad_locked(struct carp_softc *); 344 static void carp_addroute(struct carp_softc *); 345 static void carp_ifa_addroute(struct ifaddr *); 346 static void carp_delroute(struct carp_softc *); 347 static void carp_ifa_delroute(struct ifaddr *); 348 static void carp_send_ad_all(void *, int); 349 static void carp_demote_adj(int, char *); 350 351 static LIST_HEAD(, carp_softc) carp_list; 352 static struct mtx carp_mtx; 353 static struct sx carp_sx; 354 static struct task carp_sendall_task = 355 TASK_INITIALIZER(0, carp_send_ad_all, NULL); 356 357 static int 358 carp_is_supported_if(if_t ifp) 359 { 360 if (ifp == NULL) 361 return (ENXIO); 362 363 switch (ifp->if_type) { 364 case IFT_ETHER: 365 case IFT_L2VLAN: 366 case IFT_BRIDGE: 367 break; 368 default: 369 return (EOPNOTSUPP); 370 } 371 372 return (0); 373 } 374 375 static void 376 carp_hmac_prepare(struct carp_softc *sc) 377 { 378 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 379 uint8_t vhid = sc->sc_vhid & 0xff; 380 struct ifaddr *ifa; 381 int i, found; 382 #ifdef INET 383 struct in_addr last, cur, in; 384 #endif 385 #ifdef INET6 386 struct in6_addr last6, cur6, in6; 387 #endif 388 389 CARP_LOCK_ASSERT(sc); 390 391 /* Compute ipad from key. */ 392 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 393 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 394 for (i = 0; i < sizeof(sc->sc_pad); i++) 395 sc->sc_pad[i] ^= 0x36; 396 397 /* Precompute first part of inner hash. */ 398 SHA1Init(&sc->sc_sha1); 399 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 400 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 401 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 402 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 403 #ifdef INET 404 cur.s_addr = 0; 405 do { 406 found = 0; 407 last = cur; 408 cur.s_addr = 0xffffffff; 409 CARP_FOREACH_IFA(sc, ifa) { 410 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 411 if (ifa->ifa_addr->sa_family == AF_INET && 412 ntohl(in.s_addr) > ntohl(last.s_addr) && 413 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 414 cur.s_addr = in.s_addr; 415 found++; 416 } 417 } 418 if (found) 419 SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); 420 } while (found); 421 #endif /* INET */ 422 #ifdef INET6 423 memset(&cur6, 0, sizeof(cur6)); 424 do { 425 found = 0; 426 last6 = cur6; 427 memset(&cur6, 0xff, sizeof(cur6)); 428 CARP_FOREACH_IFA(sc, ifa) { 429 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 430 if (IN6_IS_SCOPE_EMBED(&in6)) 431 in6.s6_addr16[1] = 0; 432 if (ifa->ifa_addr->sa_family == AF_INET6 && 433 memcmp(&in6, &last6, sizeof(in6)) > 0 && 434 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 435 cur6 = in6; 436 found++; 437 } 438 } 439 if (found) 440 SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); 441 } while (found); 442 #endif /* INET6 */ 443 444 /* convert ipad to opad */ 445 for (i = 0; i < sizeof(sc->sc_pad); i++) 446 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 447 } 448 449 static void 450 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 451 unsigned char md[20]) 452 { 453 SHA1_CTX sha1ctx; 454 455 CARP_LOCK_ASSERT(sc); 456 457 /* fetch first half of inner hash */ 458 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 459 460 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 461 SHA1Final(md, &sha1ctx); 462 463 /* outer hash */ 464 SHA1Init(&sha1ctx); 465 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 466 SHA1Update(&sha1ctx, md, 20); 467 SHA1Final(md, &sha1ctx); 468 } 469 470 static int 471 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 472 unsigned char md[20]) 473 { 474 unsigned char md2[20]; 475 476 CARP_LOCK_ASSERT(sc); 477 478 carp_hmac_generate(sc, counter, md2); 479 480 return (bcmp(md, md2, sizeof(md2))); 481 } 482 483 /* 484 * process input packet. 485 * we have rearranged checks order compared to the rfc, 486 * but it seems more efficient this way or not possible otherwise. 487 */ 488 #ifdef INET 489 static int 490 carp_input(struct mbuf **mp, int *offp, int proto) 491 { 492 struct mbuf *m = *mp; 493 struct ip *ip = mtod(m, struct ip *); 494 struct carp_header *ch; 495 int iplen, len; 496 497 iplen = *offp; 498 *mp = NULL; 499 500 CARPSTATS_INC(carps_ipackets); 501 502 if (!V_carp_allow) { 503 m_freem(m); 504 return (IPPROTO_DONE); 505 } 506 507 iplen = ip->ip_hl << 2; 508 509 if (m->m_pkthdr.len < iplen + sizeof(*ch)) { 510 CARPSTATS_INC(carps_badlen); 511 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " 512 "on %s\n", __func__, m->m_len - sizeof(struct ip), 513 if_name(m->m_pkthdr.rcvif)); 514 m_freem(m); 515 return (IPPROTO_DONE); 516 } 517 518 if (iplen + sizeof(*ch) < m->m_len) { 519 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { 520 CARPSTATS_INC(carps_hdrops); 521 CARP_DEBUG("%s: pullup failed\n", __func__); 522 return (IPPROTO_DONE); 523 } 524 ip = mtod(m, struct ip *); 525 } 526 ch = (struct carp_header *)((char *)ip + iplen); 527 528 /* 529 * verify that the received packet length is 530 * equal to the CARP header 531 */ 532 len = iplen + sizeof(*ch); 533 if (len > m->m_pkthdr.len) { 534 CARPSTATS_INC(carps_badlen); 535 CARP_DEBUG("%s: packet too short %d on %s\n", __func__, 536 m->m_pkthdr.len, 537 if_name(m->m_pkthdr.rcvif)); 538 m_freem(m); 539 return (IPPROTO_DONE); 540 } 541 542 if ((m = m_pullup(m, len)) == NULL) { 543 CARPSTATS_INC(carps_hdrops); 544 return (IPPROTO_DONE); 545 } 546 ip = mtod(m, struct ip *); 547 ch = (struct carp_header *)((char *)ip + iplen); 548 549 /* verify the CARP checksum */ 550 m->m_data += iplen; 551 if (in_cksum(m, len - iplen)) { 552 CARPSTATS_INC(carps_badsum); 553 CARP_DEBUG("%s: checksum failed on %s\n", __func__, 554 if_name(m->m_pkthdr.rcvif)); 555 m_freem(m); 556 return (IPPROTO_DONE); 557 } 558 m->m_data -= iplen; 559 560 carp_input_c(m, ch, AF_INET, ip->ip_ttl); 561 return (IPPROTO_DONE); 562 } 563 #endif 564 565 #ifdef INET6 566 static int 567 carp6_input(struct mbuf **mp, int *offp, int proto) 568 { 569 struct mbuf *m = *mp; 570 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 571 struct carp_header *ch; 572 u_int len; 573 574 CARPSTATS_INC(carps_ipackets6); 575 576 if (!V_carp_allow) { 577 m_freem(m); 578 return (IPPROTO_DONE); 579 } 580 581 /* check if received on a valid carp interface */ 582 if (m->m_pkthdr.rcvif->if_carp == NULL) { 583 CARPSTATS_INC(carps_badif); 584 CARP_DEBUG("%s: packet received on non-carp interface: %s\n", 585 __func__, if_name(m->m_pkthdr.rcvif)); 586 m_freem(m); 587 return (IPPROTO_DONE); 588 } 589 590 /* verify that we have a complete carp packet */ 591 if (m->m_len < *offp + sizeof(*ch)) { 592 len = m->m_len; 593 m = m_pullup(m, *offp + sizeof(*ch)); 594 if (m == NULL) { 595 CARPSTATS_INC(carps_badlen); 596 CARP_DEBUG("%s: packet size %u too small\n", __func__, len); 597 return (IPPROTO_DONE); 598 } 599 ip6 = mtod(m, struct ip6_hdr *); 600 } 601 ch = (struct carp_header *)(mtod(m, char *) + *offp); 602 603 /* verify the CARP checksum */ 604 m->m_data += *offp; 605 if (in_cksum(m, sizeof(*ch))) { 606 CARPSTATS_INC(carps_badsum); 607 CARP_DEBUG("%s: checksum failed, on %s\n", __func__, 608 if_name(m->m_pkthdr.rcvif)); 609 m_freem(m); 610 return (IPPROTO_DONE); 611 } 612 m->m_data -= *offp; 613 614 carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); 615 return (IPPROTO_DONE); 616 } 617 #endif /* INET6 */ 618 619 /* 620 * This routine should not be necessary at all, but some switches 621 * (VMWare ESX vswitches) can echo our own packets back at us, 622 * and we must ignore them or they will cause us to drop out of 623 * MASTER mode. 624 * 625 * We cannot catch all cases of network loops. Instead, what we 626 * do here is catch any packet that arrives with a carp header 627 * with a VHID of 0, that comes from an address that is our own. 628 * These packets are by definition "from us" (even if they are from 629 * a misconfigured host that is pretending to be us). 630 * 631 * The VHID test is outside this mini-function. 632 */ 633 static int 634 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) 635 { 636 #ifdef INET 637 struct ip *ip4; 638 struct in_addr in4; 639 #endif 640 #ifdef INET6 641 struct ip6_hdr *ip6; 642 struct in6_addr in6; 643 #endif 644 645 switch (af) { 646 #ifdef INET 647 case AF_INET: 648 ip4 = mtod(m, struct ip *); 649 in4 = ifatoia(ifa)->ia_addr.sin_addr; 650 return (in4.s_addr == ip4->ip_src.s_addr); 651 #endif 652 #ifdef INET6 653 case AF_INET6: 654 ip6 = mtod(m, struct ip6_hdr *); 655 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 656 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); 657 #endif 658 default: 659 break; 660 } 661 return (0); 662 } 663 664 static void 665 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) 666 { 667 struct ifnet *ifp = m->m_pkthdr.rcvif; 668 struct ifaddr *ifa, *match; 669 struct carp_softc *sc; 670 uint64_t tmp_counter; 671 struct timeval sc_tv, ch_tv; 672 int error; 673 bool multicast = false; 674 675 NET_EPOCH_ASSERT(); 676 677 /* 678 * Verify that the VHID is valid on the receiving interface. 679 * 680 * There should be just one match. If there are none 681 * the VHID is not valid and we drop the packet. If 682 * there are multiple VHID matches, take just the first 683 * one, for compatibility with previous code. While we're 684 * scanning, check for obvious loops in the network topology 685 * (these should never happen, and as noted above, we may 686 * miss real loops; this is just a double-check). 687 */ 688 error = 0; 689 match = NULL; 690 IFNET_FOREACH_IFA(ifp, ifa) { 691 if (match == NULL && ifa->ifa_carp != NULL && 692 ifa->ifa_addr->sa_family == af && 693 ifa->ifa_carp->sc_vhid == ch->carp_vhid) 694 match = ifa; 695 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) 696 error = ELOOP; 697 } 698 ifa = error ? NULL : match; 699 if (ifa != NULL) 700 ifa_ref(ifa); 701 702 if (ifa == NULL) { 703 if (error == ELOOP) { 704 CARP_DEBUG("dropping looped packet on interface %s\n", 705 if_name(ifp)); 706 CARPSTATS_INC(carps_badif); /* ??? */ 707 } else { 708 CARPSTATS_INC(carps_badvhid); 709 } 710 m_freem(m); 711 return; 712 } 713 714 /* verify the CARP version. */ 715 if (ch->carp_version != CARP_VERSION) { 716 CARPSTATS_INC(carps_badver); 717 CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), 718 ch->carp_version); 719 ifa_free(ifa); 720 m_freem(m); 721 return; 722 } 723 724 sc = ifa->ifa_carp; 725 CARP_LOCK(sc); 726 if (ifa->ifa_addr->sa_family == AF_INET) { 727 multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); 728 } else { 729 multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); 730 } 731 ifa_free(ifa); 732 733 /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ 734 if (multicast && ttl != CARP_DFLTTL) { 735 CARPSTATS_INC(carps_badttl); 736 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, 737 ttl, if_name(m->m_pkthdr.rcvif)); 738 goto out; 739 } 740 741 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 742 CARPSTATS_INC(carps_badauth); 743 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, 744 sc->sc_vhid, if_name(ifp)); 745 goto out; 746 } 747 748 tmp_counter = ntohl(ch->carp_counter[0]); 749 tmp_counter = tmp_counter<<32; 750 tmp_counter += ntohl(ch->carp_counter[1]); 751 752 /* XXX Replay protection goes here */ 753 754 sc->sc_init_counter = 0; 755 sc->sc_counter = tmp_counter; 756 757 sc_tv.tv_sec = sc->sc_advbase; 758 sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; 759 ch_tv.tv_sec = ch->carp_advbase; 760 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 761 762 switch (sc->sc_state) { 763 case INIT: 764 break; 765 case MASTER: 766 /* 767 * If we receive an advertisement from a master who's going to 768 * be more frequent than us, go into BACKUP state. 769 */ 770 if (timevalcmp(&sc_tv, &ch_tv, >) || 771 timevalcmp(&sc_tv, &ch_tv, ==)) { 772 callout_stop(&sc->sc_ad_tmo); 773 carp_set_state(sc, BACKUP, 774 "more frequent advertisement received"); 775 carp_setrun(sc, 0); 776 carp_delroute(sc); 777 } 778 break; 779 case BACKUP: 780 /* 781 * If we're pre-empting masters who advertise slower than us, 782 * and this one claims to be slower, treat him as down. 783 */ 784 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { 785 carp_master_down_locked(sc, 786 "preempting a slower master"); 787 break; 788 } 789 790 /* 791 * If the master is going to advertise at such a low frequency 792 * that he's guaranteed to time out, we'd might as well just 793 * treat him as timed out now. 794 */ 795 sc_tv.tv_sec = sc->sc_advbase * 3; 796 if (timevalcmp(&sc_tv, &ch_tv, <)) { 797 carp_master_down_locked(sc, "master will time out"); 798 break; 799 } 800 801 /* 802 * Otherwise, we reset the counter and wait for the next 803 * advertisement. 804 */ 805 carp_setrun(sc, af); 806 break; 807 } 808 809 out: 810 CARP_UNLOCK(sc); 811 m_freem(m); 812 } 813 814 static int 815 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 816 { 817 struct m_tag *mtag; 818 819 if (sc->sc_init_counter) { 820 /* this could also be seconds since unix epoch */ 821 sc->sc_counter = arc4random(); 822 sc->sc_counter = sc->sc_counter << 32; 823 sc->sc_counter += arc4random(); 824 } else 825 sc->sc_counter++; 826 827 ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); 828 ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); 829 830 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 831 832 /* Tag packet for carp_output */ 833 if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), 834 M_NOWAIT)) == NULL) { 835 m_freem(m); 836 CARPSTATS_INC(carps_onomem); 837 return (ENOMEM); 838 } 839 bcopy(&sc, mtag + 1, sizeof(sc)); 840 m_tag_prepend(m, mtag); 841 842 return (0); 843 } 844 845 /* 846 * To avoid LORs and possible recursions this function shouldn't 847 * be called directly, but scheduled via taskqueue. 848 */ 849 static void 850 carp_send_ad_all(void *ctx __unused, int pending __unused) 851 { 852 struct carp_softc *sc; 853 struct epoch_tracker et; 854 855 NET_EPOCH_ENTER(et); 856 mtx_lock(&carp_mtx); 857 LIST_FOREACH(sc, &carp_list, sc_next) 858 if (sc->sc_state == MASTER) { 859 CARP_LOCK(sc); 860 CURVNET_SET(sc->sc_carpdev->if_vnet); 861 carp_send_ad_locked(sc); 862 CURVNET_RESTORE(); 863 CARP_UNLOCK(sc); 864 } 865 mtx_unlock(&carp_mtx); 866 NET_EPOCH_EXIT(et); 867 } 868 869 /* Send a periodic advertisement, executed in callout context. */ 870 static void 871 carp_send_ad(void *v) 872 { 873 struct carp_softc *sc = v; 874 struct epoch_tracker et; 875 876 NET_EPOCH_ENTER(et); 877 CARP_LOCK_ASSERT(sc); 878 CURVNET_SET(sc->sc_carpdev->if_vnet); 879 carp_send_ad_locked(sc); 880 CURVNET_RESTORE(); 881 CARP_UNLOCK(sc); 882 NET_EPOCH_EXIT(et); 883 } 884 885 static void 886 carp_send_ad_error(struct carp_softc *sc, int error) 887 { 888 889 /* 890 * We track errors and successfull sends with this logic: 891 * - Any error resets success counter to 0. 892 * - MAX_ERRORS triggers demotion. 893 * - MIN_SUCCESS successes resets error counter to 0. 894 * - MIN_SUCCESS reverts demotion, if it was triggered before. 895 */ 896 if (error) { 897 if (sc->sc_sendad_errors < INT_MAX) 898 sc->sc_sendad_errors++; 899 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 900 static const char fmt[] = "send error %d on %s"; 901 char msg[sizeof(fmt) + IFNAMSIZ]; 902 903 sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); 904 carp_demote_adj(V_carp_senderr_adj, msg); 905 } 906 sc->sc_sendad_success = 0; 907 } else if (sc->sc_sendad_errors > 0) { 908 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { 909 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 910 static const char fmt[] = "send ok on %s"; 911 char msg[sizeof(fmt) + IFNAMSIZ]; 912 913 sprintf(msg, fmt, if_name(sc->sc_carpdev)); 914 carp_demote_adj(-V_carp_senderr_adj, msg); 915 } 916 sc->sc_sendad_errors = 0; 917 } 918 } 919 } 920 921 /* 922 * Pick the best ifaddr on the given ifp for sending CARP 923 * advertisements. 924 * 925 * "Best" here is defined by ifa_preferred(). This function is much 926 * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). 927 * 928 * (This could be simplified to return the actual address, except that 929 * it has a different format in AF_INET and AF_INET6.) 930 */ 931 static struct ifaddr * 932 carp_best_ifa(int af, struct ifnet *ifp) 933 { 934 struct ifaddr *ifa, *best; 935 936 NET_EPOCH_ASSERT(); 937 938 if (af >= AF_MAX) 939 return (NULL); 940 best = NULL; 941 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 942 if (ifa->ifa_addr->sa_family == af && 943 (best == NULL || ifa_preferred(best, ifa))) 944 best = ifa; 945 } 946 if (best != NULL) 947 ifa_ref(best); 948 return (best); 949 } 950 951 static void 952 carp_send_ad_locked(struct carp_softc *sc) 953 { 954 struct carp_header ch; 955 struct timeval tv; 956 struct ifaddr *ifa; 957 struct carp_header *ch_ptr; 958 struct mbuf *m; 959 int len, advskew; 960 961 NET_EPOCH_ASSERT(); 962 CARP_LOCK_ASSERT(sc); 963 964 advskew = DEMOTE_ADVSKEW(sc); 965 tv.tv_sec = sc->sc_advbase; 966 tv.tv_usec = advskew * 1000000 / 256; 967 968 ch.carp_version = CARP_VERSION; 969 ch.carp_type = CARP_ADVERTISEMENT; 970 ch.carp_vhid = sc->sc_vhid; 971 ch.carp_advbase = sc->sc_advbase; 972 ch.carp_advskew = advskew; 973 ch.carp_authlen = 7; /* XXX DEFINE */ 974 ch.carp_pad1 = 0; /* must be zero */ 975 ch.carp_cksum = 0; 976 977 /* XXXGL: OpenBSD picks first ifaddr with needed family. */ 978 979 #ifdef INET 980 if (sc->sc_naddrs) { 981 struct ip *ip; 982 983 m = m_gethdr(M_NOWAIT, MT_DATA); 984 if (m == NULL) { 985 CARPSTATS_INC(carps_onomem); 986 goto resched; 987 } 988 len = sizeof(*ip) + sizeof(ch); 989 m->m_pkthdr.len = len; 990 m->m_pkthdr.rcvif = NULL; 991 m->m_len = len; 992 M_ALIGN(m, m->m_len); 993 if (IN_MULTICAST(sc->sc_carpaddr.s_addr)) 994 m->m_flags |= M_MCAST; 995 ip = mtod(m, struct ip *); 996 ip->ip_v = IPVERSION; 997 ip->ip_hl = sizeof(*ip) >> 2; 998 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; 999 ip->ip_len = htons(len); 1000 ip->ip_off = htons(IP_DF); 1001 ip->ip_ttl = CARP_DFLTTL; 1002 ip->ip_p = IPPROTO_CARP; 1003 ip->ip_sum = 0; 1004 ip_fillid(ip); 1005 1006 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); 1007 if (ifa != NULL) { 1008 ip->ip_src.s_addr = 1009 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1010 ifa_free(ifa); 1011 } else 1012 ip->ip_src.s_addr = 0; 1013 ip->ip_dst = sc->sc_carpaddr; 1014 1015 ch_ptr = (struct carp_header *)(&ip[1]); 1016 bcopy(&ch, ch_ptr, sizeof(ch)); 1017 if (carp_prepare_ad(m, sc, ch_ptr)) 1018 goto resched; 1019 1020 m->m_data += sizeof(*ip); 1021 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); 1022 m->m_data -= sizeof(*ip); 1023 1024 CARPSTATS_INC(carps_opackets); 1025 1026 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1027 &sc->sc_carpdev->if_carp->cif_imo, NULL)); 1028 } 1029 #endif /* INET */ 1030 #ifdef INET6 1031 if (sc->sc_naddrs6) { 1032 struct ip6_hdr *ip6; 1033 1034 m = m_gethdr(M_NOWAIT, MT_DATA); 1035 if (m == NULL) { 1036 CARPSTATS_INC(carps_onomem); 1037 goto resched; 1038 } 1039 len = sizeof(*ip6) + sizeof(ch); 1040 m->m_pkthdr.len = len; 1041 m->m_pkthdr.rcvif = NULL; 1042 m->m_len = len; 1043 M_ALIGN(m, m->m_len); 1044 ip6 = mtod(m, struct ip6_hdr *); 1045 bzero(ip6, sizeof(*ip6)); 1046 ip6->ip6_vfc |= IPV6_VERSION; 1047 /* Traffic class isn't defined in ip6 struct instead 1048 * it gets offset into flowid field */ 1049 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + 1050 IPTOS_DSCP_OFFSET)); 1051 ip6->ip6_hlim = CARP_DFLTTL; 1052 ip6->ip6_nxt = IPPROTO_CARP; 1053 1054 /* set the source address */ 1055 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); 1056 if (ifa != NULL) { 1057 bcopy(IFA_IN6(ifa), &ip6->ip6_src, 1058 sizeof(struct in6_addr)); 1059 ifa_free(ifa); 1060 } else 1061 /* This should never happen with IPv6. */ 1062 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1063 1064 /* Set the multicast destination. */ 1065 memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); 1066 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 1067 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1068 m_freem(m); 1069 CARP_DEBUG("%s: in6_setscope failed\n", __func__); 1070 goto resched; 1071 } 1072 } 1073 1074 ch_ptr = (struct carp_header *)(&ip6[1]); 1075 bcopy(&ch, ch_ptr, sizeof(ch)); 1076 if (carp_prepare_ad(m, sc, ch_ptr)) 1077 goto resched; 1078 1079 m->m_data += sizeof(*ip6); 1080 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); 1081 m->m_data -= sizeof(*ip6); 1082 1083 CARPSTATS_INC(carps_opackets6); 1084 1085 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, 1086 &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); 1087 } 1088 #endif /* INET6 */ 1089 1090 resched: 1091 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); 1092 } 1093 1094 static void 1095 carp_addroute(struct carp_softc *sc) 1096 { 1097 struct ifaddr *ifa; 1098 1099 CARP_FOREACH_IFA(sc, ifa) 1100 carp_ifa_addroute(ifa); 1101 } 1102 1103 static void 1104 carp_ifa_addroute(struct ifaddr *ifa) 1105 { 1106 1107 switch (ifa->ifa_addr->sa_family) { 1108 #ifdef INET 1109 case AF_INET: 1110 in_addprefix(ifatoia(ifa)); 1111 ifa_add_loopback_route(ifa, 1112 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1113 break; 1114 #endif 1115 #ifdef INET6 1116 case AF_INET6: 1117 ifa_add_loopback_route(ifa, 1118 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1119 nd6_add_ifa_lle(ifatoia6(ifa)); 1120 break; 1121 #endif 1122 } 1123 } 1124 1125 static void 1126 carp_delroute(struct carp_softc *sc) 1127 { 1128 struct ifaddr *ifa; 1129 1130 CARP_FOREACH_IFA(sc, ifa) 1131 carp_ifa_delroute(ifa); 1132 } 1133 1134 static void 1135 carp_ifa_delroute(struct ifaddr *ifa) 1136 { 1137 1138 switch (ifa->ifa_addr->sa_family) { 1139 #ifdef INET 1140 case AF_INET: 1141 ifa_del_loopback_route(ifa, 1142 (struct sockaddr *)&ifatoia(ifa)->ia_addr); 1143 in_scrubprefix(ifatoia(ifa), LLE_STATIC); 1144 break; 1145 #endif 1146 #ifdef INET6 1147 case AF_INET6: 1148 ifa_del_loopback_route(ifa, 1149 (struct sockaddr *)&ifatoia6(ifa)->ia_addr); 1150 nd6_rem_ifa_lle(ifatoia6(ifa), 1); 1151 break; 1152 #endif 1153 } 1154 } 1155 1156 int 1157 carp_master(struct ifaddr *ifa) 1158 { 1159 struct carp_softc *sc = ifa->ifa_carp; 1160 1161 return (sc->sc_state == MASTER); 1162 } 1163 1164 #ifdef INET 1165 /* 1166 * Broadcast a gratuitous ARP request containing 1167 * the virtual router MAC address for each IP address 1168 * associated with the virtual router. 1169 */ 1170 static void 1171 carp_send_arp(struct carp_softc *sc) 1172 { 1173 struct ifaddr *ifa; 1174 struct in_addr addr; 1175 1176 NET_EPOCH_ASSERT(); 1177 1178 CARP_FOREACH_IFA(sc, ifa) { 1179 if (ifa->ifa_addr->sa_family != AF_INET) 1180 continue; 1181 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; 1182 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); 1183 } 1184 } 1185 1186 int 1187 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) 1188 { 1189 struct carp_softc *sc = ifa->ifa_carp; 1190 1191 if (sc->sc_state == MASTER) { 1192 *enaddr = LLADDR(&sc->sc_addr); 1193 return (1); 1194 } 1195 1196 return (0); 1197 } 1198 #endif 1199 1200 #ifdef INET6 1201 static void 1202 carp_send_na(struct carp_softc *sc) 1203 { 1204 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1205 struct ifaddr *ifa; 1206 struct in6_addr *in6; 1207 1208 CARP_FOREACH_IFA(sc, ifa) { 1209 if (ifa->ifa_addr->sa_family != AF_INET6) 1210 continue; 1211 1212 in6 = IFA_IN6(ifa); 1213 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1214 ND_NA_FLAG_OVERRIDE, 1, NULL); 1215 DELAY(1000); /* XXX */ 1216 } 1217 } 1218 1219 /* 1220 * Returns ifa in case it's a carp address and it is MASTER, or if the address 1221 * matches and is not a carp address. Returns NULL otherwise. 1222 */ 1223 struct ifaddr * 1224 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) 1225 { 1226 struct ifaddr *ifa; 1227 1228 NET_EPOCH_ASSERT(); 1229 1230 ifa = NULL; 1231 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1232 if (ifa->ifa_addr->sa_family != AF_INET6) 1233 continue; 1234 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) 1235 continue; 1236 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) 1237 ifa = NULL; 1238 else 1239 ifa_ref(ifa); 1240 break; 1241 } 1242 1243 return (ifa); 1244 } 1245 1246 char * 1247 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) 1248 { 1249 struct ifaddr *ifa; 1250 1251 NET_EPOCH_ASSERT(); 1252 1253 IFNET_FOREACH_IFA(ifp, ifa) 1254 if (ifa->ifa_addr->sa_family == AF_INET6 && 1255 IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { 1256 struct carp_softc *sc = ifa->ifa_carp; 1257 struct m_tag *mtag; 1258 1259 mtag = m_tag_get(PACKET_TAG_CARP, 1260 sizeof(struct carp_softc *), M_NOWAIT); 1261 if (mtag == NULL) 1262 /* Better a bit than nothing. */ 1263 return (LLADDR(&sc->sc_addr)); 1264 1265 bcopy(&sc, mtag + 1, sizeof(sc)); 1266 m_tag_prepend(m, mtag); 1267 1268 return (LLADDR(&sc->sc_addr)); 1269 } 1270 1271 return (NULL); 1272 } 1273 #endif /* INET6 */ 1274 1275 int 1276 carp_forus(struct ifnet *ifp, u_char *dhost) 1277 { 1278 struct carp_softc *sc; 1279 uint8_t *ena = dhost; 1280 1281 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1282 return (0); 1283 1284 CIF_LOCK(ifp->if_carp); 1285 IFNET_FOREACH_CARP(ifp, sc) { 1286 /* 1287 * CARP_LOCK() is not here, since would protect nothing, but 1288 * cause deadlock with if_bridge, calling this under its lock. 1289 */ 1290 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), 1291 ETHER_ADDR_LEN)) { 1292 CIF_UNLOCK(ifp->if_carp); 1293 return (1); 1294 } 1295 } 1296 CIF_UNLOCK(ifp->if_carp); 1297 1298 return (0); 1299 } 1300 1301 /* Master down timeout event, executed in callout context. */ 1302 static void 1303 carp_master_down(void *v) 1304 { 1305 struct carp_softc *sc = v; 1306 struct epoch_tracker et; 1307 1308 NET_EPOCH_ENTER(et); 1309 CARP_LOCK_ASSERT(sc); 1310 1311 CURVNET_SET(sc->sc_carpdev->if_vnet); 1312 if (sc->sc_state == BACKUP) { 1313 carp_master_down_locked(sc, "master timed out"); 1314 } 1315 CURVNET_RESTORE(); 1316 1317 CARP_UNLOCK(sc); 1318 NET_EPOCH_EXIT(et); 1319 } 1320 1321 static void 1322 carp_master_down_locked(struct carp_softc *sc, const char *reason) 1323 { 1324 1325 NET_EPOCH_ASSERT(); 1326 CARP_LOCK_ASSERT(sc); 1327 1328 switch (sc->sc_state) { 1329 case BACKUP: 1330 carp_set_state(sc, MASTER, reason); 1331 carp_send_ad_locked(sc); 1332 #ifdef INET 1333 carp_send_arp(sc); 1334 #endif 1335 #ifdef INET6 1336 carp_send_na(sc); 1337 #endif 1338 carp_setrun(sc, 0); 1339 carp_addroute(sc); 1340 break; 1341 case INIT: 1342 case MASTER: 1343 #ifdef INVARIANTS 1344 panic("carp: VHID %u@%s: master_down event in %s state\n", 1345 sc->sc_vhid, 1346 if_name(sc->sc_carpdev), 1347 sc->sc_state ? "MASTER" : "INIT"); 1348 #endif 1349 break; 1350 } 1351 } 1352 1353 /* 1354 * When in backup state, af indicates whether to reset the master down timer 1355 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1356 */ 1357 static void 1358 carp_setrun(struct carp_softc *sc, sa_family_t af) 1359 { 1360 struct timeval tv; 1361 1362 CARP_LOCK_ASSERT(sc); 1363 1364 if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || 1365 sc->sc_carpdev->if_link_state != LINK_STATE_UP || 1366 (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || 1367 !V_carp_allow) 1368 return; 1369 1370 switch (sc->sc_state) { 1371 case INIT: 1372 carp_set_state(sc, BACKUP, "initialization complete"); 1373 carp_setrun(sc, 0); 1374 break; 1375 case BACKUP: 1376 callout_stop(&sc->sc_ad_tmo); 1377 tv.tv_sec = 3 * sc->sc_advbase; 1378 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1379 switch (af) { 1380 #ifdef INET 1381 case AF_INET: 1382 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1383 carp_master_down, sc); 1384 break; 1385 #endif 1386 #ifdef INET6 1387 case AF_INET6: 1388 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1389 carp_master_down, sc); 1390 break; 1391 #endif 1392 default: 1393 #ifdef INET 1394 if (sc->sc_naddrs) 1395 callout_reset(&sc->sc_md_tmo, tvtohz(&tv), 1396 carp_master_down, sc); 1397 #endif 1398 #ifdef INET6 1399 if (sc->sc_naddrs6) 1400 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), 1401 carp_master_down, sc); 1402 #endif 1403 break; 1404 } 1405 break; 1406 case MASTER: 1407 tv.tv_sec = sc->sc_advbase; 1408 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1409 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), 1410 carp_send_ad, sc); 1411 break; 1412 } 1413 } 1414 1415 /* 1416 * Setup multicast structures. 1417 */ 1418 static int 1419 carp_multicast_setup(struct carp_if *cif, sa_family_t sa) 1420 { 1421 struct ifnet *ifp = cif->cif_ifp; 1422 int error = 0; 1423 1424 switch (sa) { 1425 #ifdef INET 1426 case AF_INET: 1427 { 1428 struct ip_moptions *imo = &cif->cif_imo; 1429 struct in_mfilter *imf; 1430 struct in_addr addr; 1431 1432 if (ip_mfilter_first(&imo->imo_head) != NULL) 1433 return (0); 1434 1435 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 1436 ip_mfilter_init(&imo->imo_head); 1437 imo->imo_multicast_vif = -1; 1438 1439 addr.s_addr = htonl(INADDR_CARP_GROUP); 1440 if ((error = in_joingroup(ifp, &addr, NULL, 1441 &imf->imf_inm)) != 0) { 1442 ip_mfilter_free(imf); 1443 break; 1444 } 1445 1446 ip_mfilter_insert(&imo->imo_head, imf); 1447 imo->imo_multicast_ifp = ifp; 1448 imo->imo_multicast_ttl = CARP_DFLTTL; 1449 imo->imo_multicast_loop = 0; 1450 break; 1451 } 1452 #endif 1453 #ifdef INET6 1454 case AF_INET6: 1455 { 1456 struct ip6_moptions *im6o = &cif->cif_im6o; 1457 struct in6_mfilter *im6f[2]; 1458 struct in6_addr in6; 1459 1460 if (ip6_mfilter_first(&im6o->im6o_head)) 1461 return (0); 1462 1463 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1464 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); 1465 1466 ip6_mfilter_init(&im6o->im6o_head); 1467 im6o->im6o_multicast_hlim = CARP_DFLTTL; 1468 im6o->im6o_multicast_ifp = ifp; 1469 1470 /* Join IPv6 CARP multicast group. */ 1471 bzero(&in6, sizeof(in6)); 1472 in6.s6_addr16[0] = htons(0xff02); 1473 in6.s6_addr8[15] = 0x12; 1474 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1475 ip6_mfilter_free(im6f[0]); 1476 ip6_mfilter_free(im6f[1]); 1477 break; 1478 } 1479 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { 1480 ip6_mfilter_free(im6f[0]); 1481 ip6_mfilter_free(im6f[1]); 1482 break; 1483 } 1484 1485 /* Join solicited multicast address. */ 1486 bzero(&in6, sizeof(in6)); 1487 in6.s6_addr16[0] = htons(0xff02); 1488 in6.s6_addr32[1] = 0; 1489 in6.s6_addr32[2] = htonl(1); 1490 in6.s6_addr32[3] = 0; 1491 in6.s6_addr8[12] = 0xff; 1492 1493 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { 1494 ip6_mfilter_free(im6f[0]); 1495 ip6_mfilter_free(im6f[1]); 1496 break; 1497 } 1498 1499 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { 1500 in6_leavegroup(im6f[0]->im6f_in6m, NULL); 1501 ip6_mfilter_free(im6f[0]); 1502 ip6_mfilter_free(im6f[1]); 1503 break; 1504 } 1505 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); 1506 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); 1507 break; 1508 } 1509 #endif 1510 } 1511 1512 return (error); 1513 } 1514 1515 /* 1516 * Free multicast structures. 1517 */ 1518 static void 1519 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) 1520 { 1521 #ifdef INET 1522 struct ip_moptions *imo = &cif->cif_imo; 1523 struct in_mfilter *imf; 1524 #endif 1525 #ifdef INET6 1526 struct ip6_moptions *im6o = &cif->cif_im6o; 1527 struct in6_mfilter *im6f; 1528 #endif 1529 sx_assert(&carp_sx, SA_XLOCKED); 1530 1531 switch (sa) { 1532 #ifdef INET 1533 case AF_INET: 1534 if (cif->cif_naddrs != 0) 1535 break; 1536 1537 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1538 ip_mfilter_remove(&imo->imo_head, imf); 1539 in_leavegroup(imf->imf_inm, NULL); 1540 ip_mfilter_free(imf); 1541 } 1542 break; 1543 #endif 1544 #ifdef INET6 1545 case AF_INET6: 1546 if (cif->cif_naddrs6 != 0) 1547 break; 1548 1549 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 1550 ip6_mfilter_remove(&im6o->im6o_head, im6f); 1551 in6_leavegroup(im6f->im6f_in6m, NULL); 1552 ip6_mfilter_free(im6f); 1553 } 1554 break; 1555 #endif 1556 } 1557 } 1558 1559 int 1560 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) 1561 { 1562 struct m_tag *mtag; 1563 struct carp_softc *sc; 1564 1565 if (!sa) 1566 return (0); 1567 1568 switch (sa->sa_family) { 1569 #ifdef INET 1570 case AF_INET: 1571 break; 1572 #endif 1573 #ifdef INET6 1574 case AF_INET6: 1575 break; 1576 #endif 1577 default: 1578 return (0); 1579 } 1580 1581 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 1582 if (mtag == NULL) 1583 return (0); 1584 1585 bcopy(mtag + 1, &sc, sizeof(sc)); 1586 1587 switch (sa->sa_family) { 1588 case AF_INET: 1589 if (! IN_MULTICAST(sc->sc_carpaddr.s_addr)) 1590 return (0); 1591 break; 1592 case AF_INET6: 1593 if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6)) 1594 return (0); 1595 break; 1596 default: 1597 panic("Unknown af"); 1598 } 1599 1600 /* Set the source MAC address to the Virtual Router MAC Address. */ 1601 switch (ifp->if_type) { 1602 case IFT_ETHER: 1603 case IFT_BRIDGE: 1604 case IFT_L2VLAN: { 1605 struct ether_header *eh; 1606 1607 eh = mtod(m, struct ether_header *); 1608 eh->ether_shost[0] = 0; 1609 eh->ether_shost[1] = 0; 1610 eh->ether_shost[2] = 0x5e; 1611 eh->ether_shost[3] = 0; 1612 eh->ether_shost[4] = 1; 1613 eh->ether_shost[5] = sc->sc_vhid; 1614 } 1615 break; 1616 default: 1617 printf("%s: carp is not supported for the %d interface type\n", 1618 if_name(ifp), ifp->if_type); 1619 return (EOPNOTSUPP); 1620 } 1621 1622 return (0); 1623 } 1624 1625 static struct carp_softc* 1626 carp_alloc(struct ifnet *ifp) 1627 { 1628 struct carp_softc *sc; 1629 struct carp_if *cif; 1630 1631 sx_assert(&carp_sx, SA_XLOCKED); 1632 1633 if ((cif = ifp->if_carp) == NULL) 1634 cif = carp_alloc_if(ifp); 1635 1636 sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 1637 1638 sc->sc_advbase = CARP_DFLTINTV; 1639 sc->sc_vhid = -1; /* required setting */ 1640 sc->sc_init_counter = 1; 1641 sc->sc_state = INIT; 1642 1643 sc->sc_ifasiz = sizeof(struct ifaddr *); 1644 sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); 1645 sc->sc_carpdev = ifp; 1646 1647 sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); 1648 sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; 1649 sc->sc_carpaddr6.s6_addr8[15] = 0x12; 1650 1651 CARP_LOCK_INIT(sc); 1652 #ifdef INET 1653 callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1654 #endif 1655 #ifdef INET6 1656 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1657 #endif 1658 callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1659 1660 CIF_LOCK(cif); 1661 TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); 1662 CIF_UNLOCK(cif); 1663 1664 mtx_lock(&carp_mtx); 1665 LIST_INSERT_HEAD(&carp_list, sc, sc_next); 1666 mtx_unlock(&carp_mtx); 1667 1668 return (sc); 1669 } 1670 1671 static void 1672 carp_grow_ifas(struct carp_softc *sc) 1673 { 1674 struct ifaddr **new; 1675 1676 new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); 1677 CARP_LOCK(sc); 1678 bcopy(sc->sc_ifas, new, sc->sc_ifasiz); 1679 free(sc->sc_ifas, M_CARP); 1680 sc->sc_ifas = new; 1681 sc->sc_ifasiz *= 2; 1682 CARP_UNLOCK(sc); 1683 } 1684 1685 static void 1686 carp_destroy(struct carp_softc *sc) 1687 { 1688 struct ifnet *ifp = sc->sc_carpdev; 1689 struct carp_if *cif = ifp->if_carp; 1690 1691 sx_assert(&carp_sx, SA_XLOCKED); 1692 1693 if (sc->sc_suppress) 1694 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); 1695 CARP_UNLOCK(sc); 1696 1697 CIF_LOCK(cif); 1698 TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); 1699 CIF_UNLOCK(cif); 1700 1701 mtx_lock(&carp_mtx); 1702 LIST_REMOVE(sc, sc_next); 1703 mtx_unlock(&carp_mtx); 1704 1705 callout_drain(&sc->sc_ad_tmo); 1706 #ifdef INET 1707 callout_drain(&sc->sc_md_tmo); 1708 #endif 1709 #ifdef INET6 1710 callout_drain(&sc->sc_md6_tmo); 1711 #endif 1712 CARP_LOCK_DESTROY(sc); 1713 1714 free(sc->sc_ifas, M_CARP); 1715 free(sc, M_CARP); 1716 } 1717 1718 static struct carp_if* 1719 carp_alloc_if(struct ifnet *ifp) 1720 { 1721 struct carp_if *cif; 1722 int error; 1723 1724 cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); 1725 1726 if ((error = ifpromisc(ifp, 1)) != 0) 1727 printf("%s: ifpromisc(%s) failed: %d\n", 1728 __func__, if_name(ifp), error); 1729 else 1730 cif->cif_flags |= CIF_PROMISC; 1731 1732 CIF_LOCK_INIT(cif); 1733 cif->cif_ifp = ifp; 1734 TAILQ_INIT(&cif->cif_vrs); 1735 1736 IF_ADDR_WLOCK(ifp); 1737 ifp->if_carp = cif; 1738 if_ref(ifp); 1739 IF_ADDR_WUNLOCK(ifp); 1740 1741 return (cif); 1742 } 1743 1744 static void 1745 carp_free_if(struct carp_if *cif) 1746 { 1747 struct ifnet *ifp = cif->cif_ifp; 1748 1749 CIF_LOCK_ASSERT(cif); 1750 KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", 1751 __func__)); 1752 1753 IF_ADDR_WLOCK(ifp); 1754 ifp->if_carp = NULL; 1755 IF_ADDR_WUNLOCK(ifp); 1756 1757 CIF_LOCK_DESTROY(cif); 1758 1759 if (cif->cif_flags & CIF_PROMISC) 1760 ifpromisc(ifp, 0); 1761 if_rele(ifp); 1762 1763 free(cif, M_CARP); 1764 } 1765 1766 static bool 1767 carp_carprcp(void *arg, struct carp_softc *sc, int priv) 1768 { 1769 struct carpreq *carpr = arg; 1770 1771 CARP_LOCK(sc); 1772 carpr->carpr_state = sc->sc_state; 1773 carpr->carpr_vhid = sc->sc_vhid; 1774 carpr->carpr_advbase = sc->sc_advbase; 1775 carpr->carpr_advskew = sc->sc_advskew; 1776 if (priv) 1777 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 1778 else 1779 bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); 1780 CARP_UNLOCK(sc); 1781 1782 return (true); 1783 } 1784 1785 static int 1786 carp_ioctl_set(if_t ifp, struct carpkreq *carpr) 1787 { 1788 struct epoch_tracker et; 1789 struct carp_softc *sc = NULL; 1790 int error = 0; 1791 1792 1793 if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID || 1794 carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) { 1795 return (EINVAL); 1796 } 1797 1798 if (ifp->if_carp) { 1799 IFNET_FOREACH_CARP(ifp, sc) 1800 if (sc->sc_vhid == carpr->carpr_vhid) 1801 break; 1802 } 1803 if (sc == NULL) { 1804 sc = carp_alloc(ifp); 1805 CARP_LOCK(sc); 1806 sc->sc_vhid = carpr->carpr_vhid; 1807 LLADDR(&sc->sc_addr)[0] = 0; 1808 LLADDR(&sc->sc_addr)[1] = 0; 1809 LLADDR(&sc->sc_addr)[2] = 0x5e; 1810 LLADDR(&sc->sc_addr)[3] = 0; 1811 LLADDR(&sc->sc_addr)[4] = 1; 1812 LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; 1813 } else 1814 CARP_LOCK(sc); 1815 if (carpr->carpr_advbase > 0) { 1816 if (carpr->carpr_advbase > 255 || 1817 carpr->carpr_advbase < CARP_DFLTINTV) { 1818 error = EINVAL; 1819 goto out; 1820 } 1821 sc->sc_advbase = carpr->carpr_advbase; 1822 } 1823 if (carpr->carpr_advskew >= 255) { 1824 error = EINVAL; 1825 goto out; 1826 } 1827 sc->sc_advskew = carpr->carpr_advskew; 1828 if (carpr->carpr_addr.s_addr != INADDR_ANY) 1829 sc->sc_carpaddr = carpr->carpr_addr; 1830 if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { 1831 memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, 1832 sizeof(sc->sc_carpaddr6)); 1833 } 1834 if (carpr->carpr_key[0] != '\0') { 1835 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1836 carp_hmac_prepare(sc); 1837 } 1838 if (sc->sc_state != INIT && 1839 carpr->carpr_state != sc->sc_state) { 1840 switch (carpr->carpr_state) { 1841 case BACKUP: 1842 callout_stop(&sc->sc_ad_tmo); 1843 carp_set_state(sc, BACKUP, 1844 "user requested via ifconfig"); 1845 carp_setrun(sc, 0); 1846 carp_delroute(sc); 1847 break; 1848 case MASTER: 1849 NET_EPOCH_ENTER(et); 1850 carp_master_down_locked(sc, 1851 "user requested via ifconfig"); 1852 NET_EPOCH_EXIT(et); 1853 break; 1854 default: 1855 break; 1856 } 1857 } 1858 1859 out: 1860 CARP_UNLOCK(sc); 1861 1862 return (error); 1863 } 1864 1865 static int 1866 carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, 1867 bool (*outfn)(void *, struct carp_softc *, int), void *arg) 1868 { 1869 int priveleged; 1870 struct carp_softc *sc; 1871 1872 if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) 1873 return (EINVAL); 1874 if (carpr->carpr_count < 1) 1875 return (EMSGSIZE); 1876 if (ifp->if_carp == NULL) 1877 return (ENOENT); 1878 1879 priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); 1880 if (carpr->carpr_vhid != 0) { 1881 IFNET_FOREACH_CARP(ifp, sc) 1882 if (sc->sc_vhid == carpr->carpr_vhid) 1883 break; 1884 if (sc == NULL) 1885 return (ENOENT); 1886 1887 if (! outfn(arg, sc, priveleged)) 1888 return (ENOMEM); 1889 carpr->carpr_count = 1; 1890 } else { 1891 int count; 1892 1893 count = 0; 1894 IFNET_FOREACH_CARP(ifp, sc) 1895 count++; 1896 1897 if (count > carpr->carpr_count) 1898 return (EMSGSIZE); 1899 1900 IFNET_FOREACH_CARP(ifp, sc) { 1901 if (! outfn(arg, sc, priveleged)) 1902 return (ENOMEM); 1903 carpr->carpr_count = count; 1904 } 1905 } 1906 1907 return (0); 1908 } 1909 1910 int 1911 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) 1912 { 1913 struct carpreq carpr; 1914 struct carpkreq carprk = { }; 1915 struct ifnet *ifp; 1916 int error = 0; 1917 1918 if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) 1919 return (error); 1920 1921 ifp = ifunit_ref(ifr->ifr_name); 1922 if ((error = carp_is_supported_if(ifp)) != 0) 1923 goto out; 1924 1925 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1926 error = EADDRNOTAVAIL; 1927 goto out; 1928 } 1929 1930 sx_xlock(&carp_sx); 1931 switch (cmd) { 1932 case SIOCSVH: 1933 if ((error = priv_check(td, PRIV_NETINET_CARP))) 1934 break; 1935 1936 memcpy(&carprk, &carpr, sizeof(carpr)); 1937 error = carp_ioctl_set(ifp, &carprk); 1938 break; 1939 1940 case SIOCGVH: 1941 error = carp_ioctl_get(ifp, td->td_ucred, &carpr, 1942 carp_carprcp, &carpr); 1943 if (error == 0) { 1944 error = copyout(&carpr, 1945 (char *)ifr_data_get_ptr(ifr), 1946 carpr.carpr_count * sizeof(carpr)); 1947 } 1948 break; 1949 default: 1950 error = EINVAL; 1951 } 1952 sx_xunlock(&carp_sx); 1953 1954 out: 1955 if (ifp != NULL) 1956 if_rele(ifp); 1957 1958 return (error); 1959 } 1960 1961 static int 1962 carp_get_vhid(struct ifaddr *ifa) 1963 { 1964 1965 if (ifa == NULL || ifa->ifa_carp == NULL) 1966 return (0); 1967 1968 return (ifa->ifa_carp->sc_vhid); 1969 } 1970 1971 int 1972 carp_attach(struct ifaddr *ifa, int vhid) 1973 { 1974 struct ifnet *ifp = ifa->ifa_ifp; 1975 struct carp_if *cif = ifp->if_carp; 1976 struct carp_softc *sc; 1977 int index, error; 1978 1979 KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); 1980 1981 switch (ifa->ifa_addr->sa_family) { 1982 #ifdef INET 1983 case AF_INET: 1984 #endif 1985 #ifdef INET6 1986 case AF_INET6: 1987 #endif 1988 break; 1989 default: 1990 return (EPROTOTYPE); 1991 } 1992 1993 sx_xlock(&carp_sx); 1994 if (ifp->if_carp == NULL) { 1995 sx_xunlock(&carp_sx); 1996 return (ENOPROTOOPT); 1997 } 1998 1999 IFNET_FOREACH_CARP(ifp, sc) 2000 if (sc->sc_vhid == vhid) 2001 break; 2002 if (sc == NULL) { 2003 sx_xunlock(&carp_sx); 2004 return (ENOENT); 2005 } 2006 2007 error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); 2008 if (error) { 2009 CIF_FREE(cif); 2010 sx_xunlock(&carp_sx); 2011 return (error); 2012 } 2013 2014 index = sc->sc_naddrs + sc->sc_naddrs6 + 1; 2015 if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) 2016 carp_grow_ifas(sc); 2017 2018 switch (ifa->ifa_addr->sa_family) { 2019 #ifdef INET 2020 case AF_INET: 2021 cif->cif_naddrs++; 2022 sc->sc_naddrs++; 2023 break; 2024 #endif 2025 #ifdef INET6 2026 case AF_INET6: 2027 cif->cif_naddrs6++; 2028 sc->sc_naddrs6++; 2029 break; 2030 #endif 2031 } 2032 2033 ifa_ref(ifa); 2034 2035 CARP_LOCK(sc); 2036 sc->sc_ifas[index - 1] = ifa; 2037 ifa->ifa_carp = sc; 2038 carp_hmac_prepare(sc); 2039 carp_sc_state(sc); 2040 CARP_UNLOCK(sc); 2041 2042 sx_xunlock(&carp_sx); 2043 2044 return (0); 2045 } 2046 2047 void 2048 carp_detach(struct ifaddr *ifa, bool keep_cif) 2049 { 2050 struct ifnet *ifp = ifa->ifa_ifp; 2051 struct carp_if *cif = ifp->if_carp; 2052 struct carp_softc *sc = ifa->ifa_carp; 2053 int i, index; 2054 2055 KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); 2056 2057 sx_xlock(&carp_sx); 2058 2059 CARP_LOCK(sc); 2060 /* Shift array. */ 2061 index = sc->sc_naddrs + sc->sc_naddrs6; 2062 for (i = 0; i < index; i++) 2063 if (sc->sc_ifas[i] == ifa) 2064 break; 2065 KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); 2066 for (; i < index - 1; i++) 2067 sc->sc_ifas[i] = sc->sc_ifas[i+1]; 2068 sc->sc_ifas[index - 1] = NULL; 2069 2070 switch (ifa->ifa_addr->sa_family) { 2071 #ifdef INET 2072 case AF_INET: 2073 cif->cif_naddrs--; 2074 sc->sc_naddrs--; 2075 break; 2076 #endif 2077 #ifdef INET6 2078 case AF_INET6: 2079 cif->cif_naddrs6--; 2080 sc->sc_naddrs6--; 2081 break; 2082 #endif 2083 } 2084 2085 carp_ifa_delroute(ifa); 2086 carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); 2087 2088 ifa->ifa_carp = NULL; 2089 ifa_free(ifa); 2090 2091 carp_hmac_prepare(sc); 2092 carp_sc_state(sc); 2093 2094 if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) 2095 carp_destroy(sc); 2096 else 2097 CARP_UNLOCK(sc); 2098 2099 if (!keep_cif) 2100 CIF_FREE(cif); 2101 2102 sx_xunlock(&carp_sx); 2103 } 2104 2105 static void 2106 carp_set_state(struct carp_softc *sc, int state, const char *reason) 2107 { 2108 2109 CARP_LOCK_ASSERT(sc); 2110 2111 if (sc->sc_state != state) { 2112 const char *carp_states[] = { CARP_STATES }; 2113 char subsys[IFNAMSIZ+5]; 2114 2115 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, 2116 if_name(sc->sc_carpdev)); 2117 2118 CARP_LOG("%s: %s -> %s (%s)\n", subsys, 2119 carp_states[sc->sc_state], carp_states[state], reason); 2120 2121 sc->sc_state = state; 2122 2123 devctl_notify("CARP", subsys, carp_states[state], NULL); 2124 } 2125 } 2126 2127 static void 2128 carp_linkstate(struct ifnet *ifp) 2129 { 2130 struct carp_softc *sc; 2131 2132 CIF_LOCK(ifp->if_carp); 2133 IFNET_FOREACH_CARP(ifp, sc) { 2134 CARP_LOCK(sc); 2135 carp_sc_state(sc); 2136 CARP_UNLOCK(sc); 2137 } 2138 CIF_UNLOCK(ifp->if_carp); 2139 } 2140 2141 static void 2142 carp_sc_state(struct carp_softc *sc) 2143 { 2144 2145 CARP_LOCK_ASSERT(sc); 2146 2147 if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || 2148 !(sc->sc_carpdev->if_flags & IFF_UP) || 2149 !V_carp_allow) { 2150 callout_stop(&sc->sc_ad_tmo); 2151 #ifdef INET 2152 callout_stop(&sc->sc_md_tmo); 2153 #endif 2154 #ifdef INET6 2155 callout_stop(&sc->sc_md6_tmo); 2156 #endif 2157 carp_set_state(sc, INIT, "hardware interface down"); 2158 carp_setrun(sc, 0); 2159 if (!sc->sc_suppress) 2160 carp_demote_adj(V_carp_ifdown_adj, "interface down"); 2161 sc->sc_suppress = 1; 2162 } else { 2163 carp_set_state(sc, INIT, "hardware interface up"); 2164 carp_setrun(sc, 0); 2165 if (sc->sc_suppress) 2166 carp_demote_adj(-V_carp_ifdown_adj, "interface up"); 2167 sc->sc_suppress = 0; 2168 } 2169 } 2170 2171 static void 2172 carp_demote_adj(int adj, char *reason) 2173 { 2174 atomic_add_int(&V_carp_demotion, adj); 2175 CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); 2176 taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); 2177 } 2178 2179 static int 2180 carp_allow_sysctl(SYSCTL_HANDLER_ARGS) 2181 { 2182 int new, error; 2183 struct carp_softc *sc; 2184 2185 new = V_carp_allow; 2186 error = sysctl_handle_int(oidp, &new, 0, req); 2187 if (error || !req->newptr) 2188 return (error); 2189 2190 if (V_carp_allow != new) { 2191 V_carp_allow = new; 2192 2193 mtx_lock(&carp_mtx); 2194 LIST_FOREACH(sc, &carp_list, sc_next) { 2195 CARP_LOCK(sc); 2196 if (curvnet == sc->sc_carpdev->if_vnet) 2197 carp_sc_state(sc); 2198 CARP_UNLOCK(sc); 2199 } 2200 mtx_unlock(&carp_mtx); 2201 } 2202 2203 return (0); 2204 } 2205 2206 static int 2207 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) 2208 { 2209 int new, error; 2210 2211 new = V_carp_dscp; 2212 error = sysctl_handle_int(oidp, &new, 0, req); 2213 if (error || !req->newptr) 2214 return (error); 2215 2216 if (new < 0 || new > 63) 2217 return (EINVAL); 2218 2219 V_carp_dscp = new; 2220 2221 return (0); 2222 } 2223 2224 static int 2225 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) 2226 { 2227 int new, error; 2228 2229 new = V_carp_demotion; 2230 error = sysctl_handle_int(oidp, &new, 0, req); 2231 if (error || !req->newptr) 2232 return (error); 2233 2234 carp_demote_adj(new, "sysctl"); 2235 2236 return (0); 2237 } 2238 2239 static int 2240 nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 2241 { 2242 if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) 2243 return (EINVAL); 2244 2245 memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); 2246 return (0); 2247 } 2248 2249 struct carp_nl_send_args { 2250 struct nlmsghdr *hdr; 2251 struct nl_pstate *npt; 2252 }; 2253 2254 static bool 2255 carp_nl_send(void *arg, struct carp_softc *sc, int priv) 2256 { 2257 struct carp_nl_send_args *nlsa = arg; 2258 struct nlmsghdr *hdr = nlsa->hdr; 2259 struct nl_pstate *npt = nlsa->npt; 2260 struct nl_writer *nw = npt->nw; 2261 struct genlmsghdr *ghdr_new; 2262 2263 if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { 2264 nlmsg_abort(nw); 2265 return (false); 2266 } 2267 2268 ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); 2269 if (ghdr_new == NULL) { 2270 nlmsg_abort(nw); 2271 return (false); 2272 } 2273 2274 ghdr_new->cmd = CARP_NL_CMD_GET; 2275 ghdr_new->version = 0; 2276 ghdr_new->reserved = 0; 2277 2278 CARP_LOCK(sc); 2279 2280 nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); 2281 nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); 2282 nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); 2283 nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); 2284 nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); 2285 nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); 2286 2287 if (priv) 2288 nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); 2289 2290 CARP_UNLOCK(sc); 2291 2292 if (! nlmsg_end(nw)) { 2293 nlmsg_abort(nw); 2294 return (false); 2295 } 2296 2297 return (true); 2298 } 2299 2300 struct nl_carp_parsed { 2301 unsigned int ifindex; 2302 uint32_t state; 2303 uint32_t vhid; 2304 int32_t advbase; 2305 int32_t advskew; 2306 char key[CARP_KEY_LEN]; 2307 struct in_addr addr; 2308 struct in6_addr addr6; 2309 }; 2310 2311 #define _IN(_field) offsetof(struct genlmsghdr, _field) 2312 #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) 2313 2314 static const struct nlattr_parser nla_p_set[] = { 2315 { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, 2316 { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, 2317 { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, 2318 { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, 2319 { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, 2320 { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, 2321 { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, 2322 { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, 2323 }; 2324 static const struct nlfield_parser nlf_p_set[] = { 2325 }; 2326 NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set); 2327 #undef _IN 2328 #undef _OUT 2329 2330 2331 static int 2332 carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) 2333 { 2334 struct nl_carp_parsed attrs = { }; 2335 struct carp_nl_send_args args; 2336 struct carpreq carpr = { }; 2337 struct epoch_tracker et; 2338 if_t ifp; 2339 int error; 2340 2341 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2342 if (error != 0) 2343 return (error); 2344 2345 NET_EPOCH_ENTER(et); 2346 ifp = ifnet_byindex_ref(attrs.ifindex); 2347 NET_EPOCH_EXIT(et); 2348 2349 if ((error = carp_is_supported_if(ifp)) != 0) 2350 goto out; 2351 2352 hdr->nlmsg_flags |= NLM_F_MULTI; 2353 args.hdr = hdr; 2354 args.npt = npt; 2355 2356 carpr.carpr_vhid = attrs.vhid; 2357 carpr.carpr_count = CARP_MAXVHID; 2358 2359 sx_xlock(&carp_sx); 2360 error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, 2361 carp_nl_send, &args); 2362 sx_xunlock(&carp_sx); 2363 2364 if (! nlmsg_end_dump(npt->nw, error, hdr)) 2365 error = ENOMEM; 2366 2367 out: 2368 if (ifp != NULL) 2369 if_rele(ifp); 2370 2371 return (error); 2372 } 2373 2374 static int 2375 carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) 2376 { 2377 struct nl_carp_parsed attrs = { }; 2378 struct carpkreq carpr; 2379 struct epoch_tracker et; 2380 if_t ifp; 2381 int error; 2382 2383 error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); 2384 if (error != 0) 2385 return (error); 2386 2387 if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) 2388 return (EINVAL); 2389 if (attrs.state > CARP_MAXSTATE) 2390 return (EINVAL); 2391 if (attrs.advbase < 0 || attrs.advskew < 0) 2392 return (EINVAL); 2393 if (attrs.advbase > 255) 2394 return (EINVAL); 2395 if (attrs.advskew >= 255) 2396 return (EINVAL); 2397 2398 NET_EPOCH_ENTER(et); 2399 ifp = ifnet_byindex_ref(attrs.ifindex); 2400 NET_EPOCH_EXIT(et); 2401 2402 if ((error = carp_is_supported_if(ifp)) != 0) 2403 goto out; 2404 2405 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2406 error = EADDRNOTAVAIL; 2407 goto out; 2408 } 2409 2410 carpr.carpr_count = 1; 2411 carpr.carpr_vhid = attrs.vhid; 2412 carpr.carpr_state = attrs.state; 2413 carpr.carpr_advbase = attrs.advbase; 2414 carpr.carpr_advskew = attrs.advskew; 2415 carpr.carpr_addr = attrs.addr; 2416 carpr.carpr_addr6 = attrs.addr6; 2417 2418 memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); 2419 2420 sx_xlock(&carp_sx); 2421 error = carp_ioctl_set(ifp, &carpr); 2422 sx_xunlock(&carp_sx); 2423 2424 out: 2425 if (ifp != NULL) 2426 if_rele(ifp); 2427 2428 return (error); 2429 } 2430 2431 static const struct nlhdr_parser *all_parsers[] = { 2432 &carp_parser 2433 }; 2434 2435 static const struct genl_cmd carp_cmds[] = { 2436 { 2437 .cmd_num = CARP_NL_CMD_GET, 2438 .cmd_name = "SIOCGVH", 2439 .cmd_cb = carp_nl_get, 2440 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | 2441 GENL_CMD_CAP_HASPOL, 2442 }, 2443 { 2444 .cmd_num = CARP_NL_CMD_SET, 2445 .cmd_name = "SIOCSVH", 2446 .cmd_cb = carp_nl_set, 2447 .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, 2448 .cmd_priv = PRIV_NETINET_CARP, 2449 }, 2450 }; 2451 2452 static void 2453 carp_nl_register(void) 2454 { 2455 bool ret __diagused; 2456 int family_id __diagused; 2457 2458 NL_VERIFY_PARSERS(all_parsers); 2459 family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, 2460 CARP_NL_CMD_MAX); 2461 MPASS(family_id != 0); 2462 2463 ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds, 2464 NL_ARRAY_LEN(carp_cmds)); 2465 MPASS(ret); 2466 } 2467 2468 static void 2469 carp_nl_unregister(void) 2470 { 2471 genl_unregister_family(CARP_NL_FAMILY_NAME); 2472 } 2473 2474 static void 2475 carp_mod_cleanup(void) 2476 { 2477 2478 carp_nl_unregister(); 2479 2480 #ifdef INET 2481 (void)ipproto_unregister(IPPROTO_CARP); 2482 carp_iamatch_p = NULL; 2483 #endif 2484 #ifdef INET6 2485 (void)ip6proto_unregister(IPPROTO_CARP); 2486 carp_iamatch6_p = NULL; 2487 carp_macmatch6_p = NULL; 2488 #endif 2489 carp_ioctl_p = NULL; 2490 carp_attach_p = NULL; 2491 carp_detach_p = NULL; 2492 carp_get_vhid_p = NULL; 2493 carp_linkstate_p = NULL; 2494 carp_forus_p = NULL; 2495 carp_output_p = NULL; 2496 carp_demote_adj_p = NULL; 2497 carp_master_p = NULL; 2498 mtx_unlock(&carp_mtx); 2499 taskqueue_drain(taskqueue_swi, &carp_sendall_task); 2500 mtx_destroy(&carp_mtx); 2501 sx_destroy(&carp_sx); 2502 } 2503 2504 static void 2505 ipcarp_sysinit(void) 2506 { 2507 2508 /* Load allow as tunable so to postpone carp start after module load */ 2509 TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); 2510 } 2511 VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); 2512 2513 static int 2514 carp_mod_load(void) 2515 { 2516 int err; 2517 2518 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); 2519 sx_init(&carp_sx, "carp_sx"); 2520 LIST_INIT(&carp_list); 2521 carp_get_vhid_p = carp_get_vhid; 2522 carp_forus_p = carp_forus; 2523 carp_output_p = carp_output; 2524 carp_linkstate_p = carp_linkstate; 2525 carp_ioctl_p = carp_ioctl; 2526 carp_attach_p = carp_attach; 2527 carp_detach_p = carp_detach; 2528 carp_demote_adj_p = carp_demote_adj; 2529 carp_master_p = carp_master; 2530 #ifdef INET6 2531 carp_iamatch6_p = carp_iamatch6; 2532 carp_macmatch6_p = carp_macmatch6; 2533 err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); 2534 if (err) { 2535 printf("carp: error %d registering with INET6\n", err); 2536 carp_mod_cleanup(); 2537 return (err); 2538 } 2539 #endif 2540 #ifdef INET 2541 carp_iamatch_p = carp_iamatch; 2542 err = ipproto_register(IPPROTO_CARP, carp_input, NULL); 2543 if (err) { 2544 printf("carp: error %d registering with INET\n", err); 2545 carp_mod_cleanup(); 2546 return (err); 2547 } 2548 #endif 2549 2550 carp_nl_register(); 2551 2552 return (0); 2553 } 2554 2555 static int 2556 carp_modevent(module_t mod, int type, void *data) 2557 { 2558 switch (type) { 2559 case MOD_LOAD: 2560 return carp_mod_load(); 2561 /* NOTREACHED */ 2562 case MOD_UNLOAD: 2563 mtx_lock(&carp_mtx); 2564 if (LIST_EMPTY(&carp_list)) 2565 carp_mod_cleanup(); 2566 else { 2567 mtx_unlock(&carp_mtx); 2568 return (EBUSY); 2569 } 2570 break; 2571 2572 default: 2573 return (EINVAL); 2574 } 2575 2576 return (0); 2577 } 2578 2579 static moduledata_t carp_mod = { 2580 "carp", 2581 carp_modevent, 2582 0 2583 }; 2584 2585 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2586