1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/nv.h> 79 #include <sys/priv.h> 80 #include <sys/smp.h> 81 #include <sys/socket.h> 82 #include <sys/sockio.h> 83 #include <sys/sysctl.h> 84 #include <sys/syslog.h> 85 86 #include <net/bpf.h> 87 #include <net/if.h> 88 #include <net/if_var.h> 89 #include <net/if_clone.h> 90 #include <net/if_private.h> 91 #include <net/if_types.h> 92 #include <net/vnet.h> 93 #include <net/pfvar.h> 94 #include <net/if_pfsync.h> 95 96 #include <netinet/if_ether.h> 97 #include <netinet/in.h> 98 #include <netinet/in_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip_carp.h> 101 #include <netinet/ip_var.h> 102 #include <netinet/tcp.h> 103 #include <netinet/tcp_fsm.h> 104 #include <netinet/tcp_seq.h> 105 106 #include <netinet/ip6.h> 107 #include <netinet6/ip6_var.h> 108 109 #include <netpfil/pf/pfsync_nv.h> 110 111 struct pfsync_bucket; 112 struct pfsync_softc; 113 114 union inet_template { 115 struct ip ipv4; 116 }; 117 118 #define PFSYNC_MINPKT ( \ 119 sizeof(union inet_template) + \ 120 sizeof(struct pfsync_header) + \ 121 sizeof(struct pfsync_subheader) ) 122 123 static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, 124 struct pfsync_state_peer *); 125 static int pfsync_in_clr(struct mbuf *, int, int, int); 126 static int pfsync_in_ins(struct mbuf *, int, int, int); 127 static int pfsync_in_iack(struct mbuf *, int, int, int); 128 static int pfsync_in_upd(struct mbuf *, int, int, int); 129 static int pfsync_in_upd_c(struct mbuf *, int, int, int); 130 static int pfsync_in_ureq(struct mbuf *, int, int, int); 131 static int pfsync_in_del(struct mbuf *, int, int, int); 132 static int pfsync_in_del_c(struct mbuf *, int, int, int); 133 static int pfsync_in_bus(struct mbuf *, int, int, int); 134 static int pfsync_in_tdb(struct mbuf *, int, int, int); 135 static int pfsync_in_eof(struct mbuf *, int, int, int); 136 static int pfsync_in_error(struct mbuf *, int, int, int); 137 138 static int (*pfsync_acts[])(struct mbuf *, int, int, int) = { 139 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 140 pfsync_in_ins, /* PFSYNC_ACT_INS */ 141 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 142 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 143 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 144 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 145 pfsync_in_del, /* PFSYNC_ACT_DEL */ 146 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 147 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 148 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 149 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 150 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 151 pfsync_in_eof /* PFSYNC_ACT_EOF */ 152 }; 153 154 struct pfsync_q { 155 void (*write)(struct pf_kstate *, void *); 156 size_t len; 157 u_int8_t action; 158 }; 159 160 /* we have one of these for every PFSYNC_S_ */ 161 static void pfsync_out_state(struct pf_kstate *, void *); 162 static void pfsync_out_iack(struct pf_kstate *, void *); 163 static void pfsync_out_upd_c(struct pf_kstate *, void *); 164 static void pfsync_out_del(struct pf_kstate *, void *); 165 166 static struct pfsync_q pfsync_qs[] = { 167 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 170 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 171 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 172 }; 173 174 static void pfsync_q_ins(struct pf_kstate *, int, bool); 175 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 176 177 static void pfsync_update_state(struct pf_kstate *); 178 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 179 180 struct pfsync_upd_req_item { 181 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 182 struct pfsync_upd_req ur_msg; 183 }; 184 185 struct pfsync_deferral { 186 struct pfsync_softc *pd_sc; 187 TAILQ_ENTRY(pfsync_deferral) pd_entry; 188 u_int pd_refs; 189 struct callout pd_tmo; 190 191 struct pf_kstate *pd_st; 192 struct mbuf *pd_m; 193 }; 194 195 struct pfsync_bucket 196 { 197 int b_id; 198 struct pfsync_softc *b_sc; 199 struct mtx b_mtx; 200 struct callout b_tmo; 201 int b_flags; 202 #define PFSYNCF_BUCKET_PUSH 0x00000001 203 204 size_t b_len; 205 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_S_COUNT]; 206 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 207 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 208 u_int b_deferred; 209 void *b_plus; 210 size_t b_pluslen; 211 212 struct ifaltq b_snd; 213 }; 214 215 struct pfsync_softc { 216 /* Configuration */ 217 struct ifnet *sc_ifp; 218 struct ifnet *sc_sync_if; 219 struct ip_moptions sc_imo; 220 struct sockaddr_storage sc_sync_peer; 221 uint32_t sc_flags; 222 uint8_t sc_maxupdates; 223 union inet_template sc_template; 224 struct mtx sc_mtx; 225 226 /* Queued data */ 227 struct pfsync_bucket *sc_buckets; 228 229 /* Bulk update info */ 230 struct mtx sc_bulk_mtx; 231 uint32_t sc_ureq_sent; 232 int sc_bulk_tries; 233 uint32_t sc_ureq_received; 234 int sc_bulk_hashid; 235 uint64_t sc_bulk_stateid; 236 uint32_t sc_bulk_creatorid; 237 struct callout sc_bulk_tmo; 238 struct callout sc_bulkfail_tmo; 239 }; 240 241 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 242 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 243 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 244 245 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 246 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 247 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 248 249 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 250 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 251 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 252 253 static const char pfsyncname[] = "pfsync"; 254 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 255 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 256 #define V_pfsyncif VNET(pfsyncif) 257 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 258 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 259 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 260 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 261 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 262 #define V_pfsyncstats VNET(pfsyncstats) 263 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 264 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 265 266 static void pfsync_timeout(void *); 267 static void pfsync_push(struct pfsync_bucket *); 268 static void pfsync_push_all(struct pfsync_softc *); 269 static void pfsyncintr(void *); 270 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 271 struct in_mfilter *imf); 272 static void pfsync_multicast_cleanup(struct pfsync_softc *); 273 static void pfsync_pointers_init(void); 274 static void pfsync_pointers_uninit(void); 275 static int pfsync_init(void); 276 static void pfsync_uninit(void); 277 278 static unsigned long pfsync_buckets; 279 280 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 281 "PFSYNC"); 282 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 283 &VNET_NAME(pfsyncstats), pfsyncstats, 284 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 285 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 286 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 287 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 288 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 289 290 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 291 static void pfsync_clone_destroy(struct ifnet *); 292 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 293 struct pf_state_peer *); 294 static int pfsyncoutput(struct ifnet *, struct mbuf *, 295 const struct sockaddr *, struct route *); 296 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 297 298 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 299 static void pfsync_undefer(struct pfsync_deferral *, int); 300 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 301 static void pfsync_undefer_state(struct pf_kstate *, int); 302 static void pfsync_defer_tmo(void *); 303 304 static void pfsync_request_update(u_int32_t, u_int64_t); 305 static bool pfsync_update_state_req(struct pf_kstate *); 306 307 static void pfsync_drop(struct pfsync_softc *); 308 static void pfsync_sendout(int, int); 309 static void pfsync_send_plus(void *, size_t); 310 311 static void pfsync_bulk_start(void); 312 static void pfsync_bulk_status(u_int8_t); 313 static void pfsync_bulk_update(void *); 314 static void pfsync_bulk_fail(void *); 315 316 static void pfsync_detach_ifnet(struct ifnet *); 317 318 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 319 struct pfsync_kstatus *); 320 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 321 struct pfsync_softc *); 322 323 #ifdef IPSEC 324 static void pfsync_update_net_tdb(struct pfsync_tdb *); 325 #endif 326 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 327 struct pf_kstate *); 328 329 #define PFSYNC_MAX_BULKTRIES 12 330 #define PFSYNC_DEFER_TIMEOUT ((20 * hz) / 1000) 331 332 VNET_DEFINE(struct if_clone *, pfsync_cloner); 333 #define V_pfsync_cloner VNET(pfsync_cloner) 334 335 static int 336 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 337 { 338 struct pfsync_softc *sc; 339 struct ifnet *ifp; 340 struct pfsync_bucket *b; 341 int c, q; 342 343 if (unit != 0) 344 return (EINVAL); 345 346 if (! pfsync_buckets) 347 pfsync_buckets = mp_ncpus * 2; 348 349 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 350 sc->sc_flags |= PFSYNCF_OK; 351 sc->sc_maxupdates = 128; 352 353 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 354 if (ifp == NULL) { 355 free(sc, M_PFSYNC); 356 return (ENOSPC); 357 } 358 if_initname(ifp, pfsyncname, unit); 359 ifp->if_softc = sc; 360 ifp->if_ioctl = pfsyncioctl; 361 ifp->if_output = pfsyncoutput; 362 ifp->if_type = IFT_PFSYNC; 363 ifp->if_hdrlen = sizeof(struct pfsync_header); 364 ifp->if_mtu = ETHERMTU; 365 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 366 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 367 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 368 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 369 370 if_attach(ifp); 371 372 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 373 374 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 375 M_PFSYNC, M_ZERO | M_WAITOK); 376 for (c = 0; c < pfsync_buckets; c++) { 377 b = &sc->sc_buckets[c]; 378 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 379 380 b->b_id = c; 381 b->b_sc = sc; 382 b->b_len = PFSYNC_MINPKT; 383 384 for (q = 0; q < PFSYNC_S_COUNT; q++) 385 TAILQ_INIT(&b->b_qs[q]); 386 387 TAILQ_INIT(&b->b_upd_req_list); 388 TAILQ_INIT(&b->b_deferrals); 389 390 callout_init(&b->b_tmo, 1); 391 392 b->b_snd.ifq_maxlen = ifqmaxlen; 393 } 394 395 V_pfsyncif = sc; 396 397 return (0); 398 } 399 400 static void 401 pfsync_clone_destroy(struct ifnet *ifp) 402 { 403 struct pfsync_softc *sc = ifp->if_softc; 404 struct pfsync_bucket *b; 405 int c; 406 407 for (c = 0; c < pfsync_buckets; c++) { 408 b = &sc->sc_buckets[c]; 409 /* 410 * At this stage, everything should have already been 411 * cleared by pfsync_uninit(), and we have only to 412 * drain callouts. 413 */ 414 while (b->b_deferred > 0) { 415 struct pfsync_deferral *pd = 416 TAILQ_FIRST(&b->b_deferrals); 417 418 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 419 b->b_deferred--; 420 if (callout_stop(&pd->pd_tmo) > 0) { 421 pf_release_state(pd->pd_st); 422 m_freem(pd->pd_m); 423 free(pd, M_PFSYNC); 424 } else { 425 pd->pd_refs++; 426 callout_drain(&pd->pd_tmo); 427 free(pd, M_PFSYNC); 428 } 429 } 430 431 callout_drain(&b->b_tmo); 432 } 433 434 callout_drain(&sc->sc_bulkfail_tmo); 435 callout_drain(&sc->sc_bulk_tmo); 436 437 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 438 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 439 bpfdetach(ifp); 440 if_detach(ifp); 441 442 pfsync_drop(sc); 443 444 if_free(ifp); 445 pfsync_multicast_cleanup(sc); 446 mtx_destroy(&sc->sc_mtx); 447 mtx_destroy(&sc->sc_bulk_mtx); 448 449 free(sc->sc_buckets, M_PFSYNC); 450 free(sc, M_PFSYNC); 451 452 V_pfsyncif = NULL; 453 } 454 455 static int 456 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 457 struct pf_state_peer *d) 458 { 459 if (s->scrub.scrub_flag && d->scrub == NULL) { 460 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 461 if (d->scrub == NULL) 462 return (ENOMEM); 463 } 464 465 return (0); 466 } 467 468 static int 469 pfsync_state_import(struct pfsync_state *sp, int flags) 470 { 471 struct pfsync_softc *sc = V_pfsyncif; 472 #ifndef __NO_STRICT_ALIGNMENT 473 struct pfsync_state_key key[2]; 474 #endif 475 struct pfsync_state_key *kw, *ks; 476 struct pf_kstate *st = NULL; 477 struct pf_state_key *skw = NULL, *sks = NULL; 478 struct pf_krule *r = NULL; 479 struct pfi_kkif *kif; 480 int error; 481 482 PF_RULES_RASSERT(); 483 484 if (sp->creatorid == 0) { 485 if (V_pf_status.debug >= PF_DEBUG_MISC) 486 printf("%s: invalid creator id: %08x\n", __func__, 487 ntohl(sp->creatorid)); 488 return (EINVAL); 489 } 490 491 if ((kif = pfi_kkif_find(sp->ifname)) == NULL) { 492 if (V_pf_status.debug >= PF_DEBUG_MISC) 493 printf("%s: unknown interface: %s\n", __func__, 494 sp->ifname); 495 if (flags & PFSYNC_SI_IOCTL) 496 return (EINVAL); 497 return (0); /* skip this state */ 498 } 499 500 /* 501 * If the ruleset checksums match or the state is coming from the ioctl, 502 * it's safe to associate the state with the rule of that number. 503 */ 504 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 505 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 506 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 507 r = pf_main_ruleset.rules[ 508 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 509 else 510 r = &V_pf_default_rule; 511 512 if ((r->max_states && 513 counter_u64_fetch(r->states_cur) >= r->max_states)) 514 goto cleanup; 515 516 /* 517 * XXXGL: consider M_WAITOK in ioctl path after. 518 */ 519 st = pf_alloc_state(M_NOWAIT); 520 if (__predict_false(st == NULL)) 521 goto cleanup; 522 523 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 524 goto cleanup; 525 526 #ifndef __NO_STRICT_ALIGNMENT 527 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 528 kw = &key[PF_SK_WIRE]; 529 ks = &key[PF_SK_STACK]; 530 #else 531 kw = &sp->key[PF_SK_WIRE]; 532 ks = &sp->key[PF_SK_STACK]; 533 #endif 534 535 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 536 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 537 kw->port[0] != ks->port[0] || 538 kw->port[1] != ks->port[1]) { 539 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 540 if (sks == NULL) 541 goto cleanup; 542 } else 543 sks = skw; 544 545 /* allocate memory for scrub info */ 546 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 547 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 548 goto cleanup; 549 550 /* Copy to state key(s). */ 551 skw->addr[0] = kw->addr[0]; 552 skw->addr[1] = kw->addr[1]; 553 skw->port[0] = kw->port[0]; 554 skw->port[1] = kw->port[1]; 555 skw->proto = sp->proto; 556 skw->af = sp->af; 557 if (sks != skw) { 558 sks->addr[0] = ks->addr[0]; 559 sks->addr[1] = ks->addr[1]; 560 sks->port[0] = ks->port[0]; 561 sks->port[1] = ks->port[1]; 562 sks->proto = sp->proto; 563 sks->af = sp->af; 564 } 565 566 /* copy to state */ 567 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 568 st->creation = time_uptime - ntohl(sp->creation); 569 st->expire = time_uptime; 570 if (sp->expire) { 571 uint32_t timeout; 572 573 timeout = r->timeout[sp->timeout]; 574 if (!timeout) 575 timeout = V_pf_default_rule.timeout[sp->timeout]; 576 577 /* sp->expire may have been adaptively scaled by export. */ 578 st->expire -= timeout - ntohl(sp->expire); 579 } 580 581 st->direction = sp->direction; 582 st->log = sp->log; 583 st->timeout = sp->timeout; 584 st->state_flags = sp->state_flags; 585 586 st->id = sp->id; 587 st->creatorid = sp->creatorid; 588 pf_state_peer_ntoh(&sp->src, &st->src); 589 pf_state_peer_ntoh(&sp->dst, &st->dst); 590 591 st->rule.ptr = r; 592 st->nat_rule.ptr = NULL; 593 st->anchor.ptr = NULL; 594 st->rt_kif = NULL; 595 596 st->pfsync_time = time_uptime; 597 st->sync_state = PFSYNC_S_NONE; 598 599 if (!(flags & PFSYNC_SI_IOCTL)) 600 st->state_flags |= PFSTATE_NOSYNC; 601 602 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 603 goto cleanup_state; 604 605 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 606 counter_u64_add(r->states_cur, 1); 607 counter_u64_add(r->states_tot, 1); 608 609 if (!(flags & PFSYNC_SI_IOCTL)) { 610 st->state_flags &= ~PFSTATE_NOSYNC; 611 if (st->state_flags & PFSTATE_ACK) { 612 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 613 PFSYNC_BUCKET_LOCK(b); 614 pfsync_q_ins(st, PFSYNC_S_IACK, true); 615 PFSYNC_BUCKET_UNLOCK(b); 616 617 pfsync_push_all(sc); 618 } 619 } 620 st->state_flags &= ~PFSTATE_ACK; 621 PF_STATE_UNLOCK(st); 622 623 return (0); 624 625 cleanup: 626 error = ENOMEM; 627 if (skw == sks) 628 sks = NULL; 629 if (skw != NULL) 630 uma_zfree(V_pf_state_key_z, skw); 631 if (sks != NULL) 632 uma_zfree(V_pf_state_key_z, sks); 633 634 cleanup_state: /* pf_state_insert() frees the state keys. */ 635 if (st) { 636 st->timeout = PFTM_UNLINKED; /* appease an assert */ 637 pf_free_state(st); 638 } 639 return (error); 640 } 641 642 #ifdef INET 643 static int 644 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 645 { 646 struct pfsync_softc *sc = V_pfsyncif; 647 struct mbuf *m = *mp; 648 struct ip *ip = mtod(m, struct ip *); 649 struct pfsync_header *ph; 650 struct pfsync_subheader subh; 651 652 int offset, len, flags = 0; 653 int rv; 654 uint16_t count; 655 656 PF_RULES_RLOCK_TRACKER; 657 658 *mp = NULL; 659 V_pfsyncstats.pfsyncs_ipackets++; 660 661 /* Verify that we have a sync interface configured. */ 662 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 663 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 664 goto done; 665 666 /* verify that the packet came in on the right interface */ 667 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 668 V_pfsyncstats.pfsyncs_badif++; 669 goto done; 670 } 671 672 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 673 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 674 /* verify that the IP TTL is 255. */ 675 if (ip->ip_ttl != PFSYNC_DFLTTL) { 676 V_pfsyncstats.pfsyncs_badttl++; 677 goto done; 678 } 679 680 offset = ip->ip_hl << 2; 681 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 682 V_pfsyncstats.pfsyncs_hdrops++; 683 goto done; 684 } 685 686 if (offset + sizeof(*ph) > m->m_len) { 687 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 688 V_pfsyncstats.pfsyncs_hdrops++; 689 return (IPPROTO_DONE); 690 } 691 ip = mtod(m, struct ip *); 692 } 693 ph = (struct pfsync_header *)((char *)ip + offset); 694 695 /* verify the version */ 696 if (ph->version != PFSYNC_VERSION) { 697 V_pfsyncstats.pfsyncs_badver++; 698 goto done; 699 } 700 701 len = ntohs(ph->len) + offset; 702 if (m->m_pkthdr.len < len) { 703 V_pfsyncstats.pfsyncs_badlen++; 704 goto done; 705 } 706 707 /* 708 * Trusting pf_chksum during packet processing, as well as seeking 709 * in interface name tree, require holding PF_RULES_RLOCK(). 710 */ 711 PF_RULES_RLOCK(); 712 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 713 flags = PFSYNC_SI_CKSUM; 714 715 offset += sizeof(*ph); 716 while (offset <= len - sizeof(subh)) { 717 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 718 offset += sizeof(subh); 719 720 if (subh.action >= PFSYNC_ACT_MAX) { 721 V_pfsyncstats.pfsyncs_badact++; 722 PF_RULES_RUNLOCK(); 723 goto done; 724 } 725 726 count = ntohs(subh.count); 727 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 728 rv = (*pfsync_acts[subh.action])(m, offset, count, flags); 729 if (rv == -1) { 730 PF_RULES_RUNLOCK(); 731 return (IPPROTO_DONE); 732 } 733 734 offset += rv; 735 } 736 PF_RULES_RUNLOCK(); 737 738 done: 739 m_freem(m); 740 return (IPPROTO_DONE); 741 } 742 #endif 743 744 static int 745 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags) 746 { 747 struct pfsync_clr *clr; 748 struct mbuf *mp; 749 int len = sizeof(*clr) * count; 750 int i, offp; 751 u_int32_t creatorid; 752 753 mp = m_pulldown(m, offset, len, &offp); 754 if (mp == NULL) { 755 V_pfsyncstats.pfsyncs_badlen++; 756 return (-1); 757 } 758 clr = (struct pfsync_clr *)(mp->m_data + offp); 759 760 for (i = 0; i < count; i++) { 761 creatorid = clr[i].creatorid; 762 763 if (clr[i].ifname[0] != '\0' && 764 pfi_kkif_find(clr[i].ifname) == NULL) 765 continue; 766 767 for (int i = 0; i <= pf_hashmask; i++) { 768 struct pf_idhash *ih = &V_pf_idhash[i]; 769 struct pf_kstate *s; 770 relock: 771 PF_HASHROW_LOCK(ih); 772 LIST_FOREACH(s, &ih->states, entry) { 773 if (s->creatorid == creatorid) { 774 s->state_flags |= PFSTATE_NOSYNC; 775 pf_unlink_state(s); 776 goto relock; 777 } 778 } 779 PF_HASHROW_UNLOCK(ih); 780 } 781 } 782 783 return (len); 784 } 785 786 static int 787 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags) 788 { 789 struct mbuf *mp; 790 struct pfsync_state *sa, *sp; 791 int len = sizeof(*sp) * count; 792 int i, offp; 793 794 mp = m_pulldown(m, offset, len, &offp); 795 if (mp == NULL) { 796 V_pfsyncstats.pfsyncs_badlen++; 797 return (-1); 798 } 799 sa = (struct pfsync_state *)(mp->m_data + offp); 800 801 for (i = 0; i < count; i++) { 802 sp = &sa[i]; 803 804 /* Check for invalid values. */ 805 if (sp->timeout >= PFTM_MAX || 806 sp->src.state > PF_TCPS_PROXY_DST || 807 sp->dst.state > PF_TCPS_PROXY_DST || 808 sp->direction > PF_OUT || 809 (sp->af != AF_INET && sp->af != AF_INET6)) { 810 if (V_pf_status.debug >= PF_DEBUG_MISC) 811 printf("%s: invalid value\n", __func__); 812 V_pfsyncstats.pfsyncs_badval++; 813 continue; 814 } 815 816 if (pfsync_state_import(sp, flags) == ENOMEM) 817 /* Drop out, but process the rest of the actions. */ 818 break; 819 } 820 821 return (len); 822 } 823 824 static int 825 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags) 826 { 827 struct pfsync_ins_ack *ia, *iaa; 828 struct pf_kstate *st; 829 830 struct mbuf *mp; 831 int len = count * sizeof(*ia); 832 int offp, i; 833 834 mp = m_pulldown(m, offset, len, &offp); 835 if (mp == NULL) { 836 V_pfsyncstats.pfsyncs_badlen++; 837 return (-1); 838 } 839 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 840 841 for (i = 0; i < count; i++) { 842 ia = &iaa[i]; 843 844 st = pf_find_state_byid(ia->id, ia->creatorid); 845 if (st == NULL) 846 continue; 847 848 if (st->state_flags & PFSTATE_ACK) { 849 pfsync_undefer_state(st, 0); 850 } 851 PF_STATE_UNLOCK(st); 852 } 853 /* 854 * XXX this is not yet implemented, but we know the size of the 855 * message so we can skip it. 856 */ 857 858 return (count * sizeof(struct pfsync_ins_ack)); 859 } 860 861 static int 862 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, 863 struct pfsync_state_peer *dst) 864 { 865 int sync = 0; 866 867 PF_STATE_LOCK_ASSERT(st); 868 869 /* 870 * The state should never go backwards except 871 * for syn-proxy states. Neither should the 872 * sequence window slide backwards. 873 */ 874 if ((st->src.state > src->state && 875 (st->src.state < PF_TCPS_PROXY_SRC || 876 src->state >= PF_TCPS_PROXY_SRC)) || 877 878 (st->src.state == src->state && 879 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 880 sync++; 881 else 882 pf_state_peer_ntoh(src, &st->src); 883 884 if ((st->dst.state > dst->state) || 885 886 (st->dst.state >= TCPS_SYN_SENT && 887 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 888 sync++; 889 else 890 pf_state_peer_ntoh(dst, &st->dst); 891 892 return (sync); 893 } 894 895 static int 896 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags) 897 { 898 struct pfsync_softc *sc = V_pfsyncif; 899 struct pfsync_state *sa, *sp; 900 struct pf_kstate *st; 901 int sync; 902 903 struct mbuf *mp; 904 int len = count * sizeof(*sp); 905 int offp, i; 906 907 mp = m_pulldown(m, offset, len, &offp); 908 if (mp == NULL) { 909 V_pfsyncstats.pfsyncs_badlen++; 910 return (-1); 911 } 912 sa = (struct pfsync_state *)(mp->m_data + offp); 913 914 for (i = 0; i < count; i++) { 915 sp = &sa[i]; 916 917 /* check for invalid values */ 918 if (sp->timeout >= PFTM_MAX || 919 sp->src.state > PF_TCPS_PROXY_DST || 920 sp->dst.state > PF_TCPS_PROXY_DST) { 921 if (V_pf_status.debug >= PF_DEBUG_MISC) { 922 printf("pfsync_input: PFSYNC_ACT_UPD: " 923 "invalid value\n"); 924 } 925 V_pfsyncstats.pfsyncs_badval++; 926 continue; 927 } 928 929 st = pf_find_state_byid(sp->id, sp->creatorid); 930 if (st == NULL) { 931 /* insert the update */ 932 if (pfsync_state_import(sp, flags)) 933 V_pfsyncstats.pfsyncs_badstate++; 934 continue; 935 } 936 937 if (st->state_flags & PFSTATE_ACK) { 938 pfsync_undefer_state(st, 1); 939 } 940 941 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 942 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 943 else { 944 sync = 0; 945 946 /* 947 * Non-TCP protocol state machine always go 948 * forwards 949 */ 950 if (st->src.state > sp->src.state) 951 sync++; 952 else 953 pf_state_peer_ntoh(&sp->src, &st->src); 954 if (st->dst.state > sp->dst.state) 955 sync++; 956 else 957 pf_state_peer_ntoh(&sp->dst, &st->dst); 958 } 959 if (sync < 2) { 960 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 961 pf_state_peer_ntoh(&sp->dst, &st->dst); 962 st->expire = time_uptime; 963 st->timeout = sp->timeout; 964 } 965 st->pfsync_time = time_uptime; 966 967 if (sync) { 968 V_pfsyncstats.pfsyncs_stale++; 969 970 pfsync_update_state(st); 971 PF_STATE_UNLOCK(st); 972 pfsync_push_all(sc); 973 continue; 974 } 975 PF_STATE_UNLOCK(st); 976 } 977 978 return (len); 979 } 980 981 static int 982 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags) 983 { 984 struct pfsync_softc *sc = V_pfsyncif; 985 struct pfsync_upd_c *ua, *up; 986 struct pf_kstate *st; 987 int len = count * sizeof(*up); 988 int sync; 989 struct mbuf *mp; 990 int offp, i; 991 992 mp = m_pulldown(m, offset, len, &offp); 993 if (mp == NULL) { 994 V_pfsyncstats.pfsyncs_badlen++; 995 return (-1); 996 } 997 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 998 999 for (i = 0; i < count; i++) { 1000 up = &ua[i]; 1001 1002 /* check for invalid values */ 1003 if (up->timeout >= PFTM_MAX || 1004 up->src.state > PF_TCPS_PROXY_DST || 1005 up->dst.state > PF_TCPS_PROXY_DST) { 1006 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1007 printf("pfsync_input: " 1008 "PFSYNC_ACT_UPD_C: " 1009 "invalid value\n"); 1010 } 1011 V_pfsyncstats.pfsyncs_badval++; 1012 continue; 1013 } 1014 1015 st = pf_find_state_byid(up->id, up->creatorid); 1016 if (st == NULL) { 1017 /* We don't have this state. Ask for it. */ 1018 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1019 pfsync_request_update(up->creatorid, up->id); 1020 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1021 continue; 1022 } 1023 1024 if (st->state_flags & PFSTATE_ACK) { 1025 pfsync_undefer_state(st, 1); 1026 } 1027 1028 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1029 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1030 else { 1031 sync = 0; 1032 1033 /* 1034 * Non-TCP protocol state machine always go 1035 * forwards 1036 */ 1037 if (st->src.state > up->src.state) 1038 sync++; 1039 else 1040 pf_state_peer_ntoh(&up->src, &st->src); 1041 if (st->dst.state > up->dst.state) 1042 sync++; 1043 else 1044 pf_state_peer_ntoh(&up->dst, &st->dst); 1045 } 1046 if (sync < 2) { 1047 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1048 pf_state_peer_ntoh(&up->dst, &st->dst); 1049 st->expire = time_uptime; 1050 st->timeout = up->timeout; 1051 } 1052 st->pfsync_time = time_uptime; 1053 1054 if (sync) { 1055 V_pfsyncstats.pfsyncs_stale++; 1056 1057 pfsync_update_state(st); 1058 PF_STATE_UNLOCK(st); 1059 pfsync_push_all(sc); 1060 continue; 1061 } 1062 PF_STATE_UNLOCK(st); 1063 } 1064 1065 return (len); 1066 } 1067 1068 static int 1069 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags) 1070 { 1071 struct pfsync_upd_req *ur, *ura; 1072 struct mbuf *mp; 1073 int len = count * sizeof(*ur); 1074 int i, offp; 1075 1076 struct pf_kstate *st; 1077 1078 mp = m_pulldown(m, offset, len, &offp); 1079 if (mp == NULL) { 1080 V_pfsyncstats.pfsyncs_badlen++; 1081 return (-1); 1082 } 1083 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1084 1085 for (i = 0; i < count; i++) { 1086 ur = &ura[i]; 1087 1088 if (ur->id == 0 && ur->creatorid == 0) 1089 pfsync_bulk_start(); 1090 else { 1091 st = pf_find_state_byid(ur->id, ur->creatorid); 1092 if (st == NULL) { 1093 V_pfsyncstats.pfsyncs_badstate++; 1094 continue; 1095 } 1096 if (st->state_flags & PFSTATE_NOSYNC) { 1097 PF_STATE_UNLOCK(st); 1098 continue; 1099 } 1100 1101 pfsync_update_state_req(st); 1102 PF_STATE_UNLOCK(st); 1103 } 1104 } 1105 1106 return (len); 1107 } 1108 1109 static int 1110 pfsync_in_del(struct mbuf *m, int offset, int count, int flags) 1111 { 1112 struct mbuf *mp; 1113 struct pfsync_state *sa, *sp; 1114 struct pf_kstate *st; 1115 int len = count * sizeof(*sp); 1116 int offp, i; 1117 1118 mp = m_pulldown(m, offset, len, &offp); 1119 if (mp == NULL) { 1120 V_pfsyncstats.pfsyncs_badlen++; 1121 return (-1); 1122 } 1123 sa = (struct pfsync_state *)(mp->m_data + offp); 1124 1125 for (i = 0; i < count; i++) { 1126 sp = &sa[i]; 1127 1128 st = pf_find_state_byid(sp->id, sp->creatorid); 1129 if (st == NULL) { 1130 V_pfsyncstats.pfsyncs_badstate++; 1131 continue; 1132 } 1133 st->state_flags |= PFSTATE_NOSYNC; 1134 pf_unlink_state(st); 1135 } 1136 1137 return (len); 1138 } 1139 1140 static int 1141 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags) 1142 { 1143 struct mbuf *mp; 1144 struct pfsync_del_c *sa, *sp; 1145 struct pf_kstate *st; 1146 int len = count * sizeof(*sp); 1147 int offp, i; 1148 1149 mp = m_pulldown(m, offset, len, &offp); 1150 if (mp == NULL) { 1151 V_pfsyncstats.pfsyncs_badlen++; 1152 return (-1); 1153 } 1154 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1155 1156 for (i = 0; i < count; i++) { 1157 sp = &sa[i]; 1158 1159 st = pf_find_state_byid(sp->id, sp->creatorid); 1160 if (st == NULL) { 1161 V_pfsyncstats.pfsyncs_badstate++; 1162 continue; 1163 } 1164 1165 st->state_flags |= PFSTATE_NOSYNC; 1166 pf_unlink_state(st); 1167 } 1168 1169 return (len); 1170 } 1171 1172 static int 1173 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags) 1174 { 1175 struct pfsync_softc *sc = V_pfsyncif; 1176 struct pfsync_bus *bus; 1177 struct mbuf *mp; 1178 int len = count * sizeof(*bus); 1179 int offp; 1180 1181 PFSYNC_BLOCK(sc); 1182 1183 /* If we're not waiting for a bulk update, who cares. */ 1184 if (sc->sc_ureq_sent == 0) { 1185 PFSYNC_BUNLOCK(sc); 1186 return (len); 1187 } 1188 1189 mp = m_pulldown(m, offset, len, &offp); 1190 if (mp == NULL) { 1191 PFSYNC_BUNLOCK(sc); 1192 V_pfsyncstats.pfsyncs_badlen++; 1193 return (-1); 1194 } 1195 bus = (struct pfsync_bus *)(mp->m_data + offp); 1196 1197 switch (bus->status) { 1198 case PFSYNC_BUS_START: 1199 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1200 V_pf_limits[PF_LIMIT_STATES].limit / 1201 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1202 sizeof(struct pfsync_state)), 1203 pfsync_bulk_fail, sc); 1204 if (V_pf_status.debug >= PF_DEBUG_MISC) 1205 printf("pfsync: received bulk update start\n"); 1206 break; 1207 1208 case PFSYNC_BUS_END: 1209 if (time_uptime - ntohl(bus->endtime) >= 1210 sc->sc_ureq_sent) { 1211 /* that's it, we're happy */ 1212 sc->sc_ureq_sent = 0; 1213 sc->sc_bulk_tries = 0; 1214 callout_stop(&sc->sc_bulkfail_tmo); 1215 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1216 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1217 "pfsync bulk done"); 1218 sc->sc_flags |= PFSYNCF_OK; 1219 if (V_pf_status.debug >= PF_DEBUG_MISC) 1220 printf("pfsync: received valid " 1221 "bulk update end\n"); 1222 } else { 1223 if (V_pf_status.debug >= PF_DEBUG_MISC) 1224 printf("pfsync: received invalid " 1225 "bulk update end: bad timestamp\n"); 1226 } 1227 break; 1228 } 1229 PFSYNC_BUNLOCK(sc); 1230 1231 return (len); 1232 } 1233 1234 static int 1235 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags) 1236 { 1237 int len = count * sizeof(struct pfsync_tdb); 1238 1239 #if defined(IPSEC) 1240 struct pfsync_tdb *tp; 1241 struct mbuf *mp; 1242 int offp; 1243 int i; 1244 int s; 1245 1246 mp = m_pulldown(m, offset, len, &offp); 1247 if (mp == NULL) { 1248 V_pfsyncstats.pfsyncs_badlen++; 1249 return (-1); 1250 } 1251 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1252 1253 for (i = 0; i < count; i++) 1254 pfsync_update_net_tdb(&tp[i]); 1255 #endif 1256 1257 return (len); 1258 } 1259 1260 #if defined(IPSEC) 1261 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1262 static void 1263 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1264 { 1265 struct tdb *tdb; 1266 int s; 1267 1268 /* check for invalid values */ 1269 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1270 (pt->dst.sa.sa_family != AF_INET && 1271 pt->dst.sa.sa_family != AF_INET6)) 1272 goto bad; 1273 1274 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1275 if (tdb) { 1276 pt->rpl = ntohl(pt->rpl); 1277 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1278 1279 /* Neither replay nor byte counter should ever decrease. */ 1280 if (pt->rpl < tdb->tdb_rpl || 1281 pt->cur_bytes < tdb->tdb_cur_bytes) { 1282 goto bad; 1283 } 1284 1285 tdb->tdb_rpl = pt->rpl; 1286 tdb->tdb_cur_bytes = pt->cur_bytes; 1287 } 1288 return; 1289 1290 bad: 1291 if (V_pf_status.debug >= PF_DEBUG_MISC) 1292 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1293 "invalid value\n"); 1294 V_pfsyncstats.pfsyncs_badstate++; 1295 return; 1296 } 1297 #endif 1298 1299 static int 1300 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags) 1301 { 1302 /* check if we are at the right place in the packet */ 1303 if (offset != m->m_pkthdr.len) 1304 V_pfsyncstats.pfsyncs_badlen++; 1305 1306 /* we're done. free and let the caller return */ 1307 m_freem(m); 1308 return (-1); 1309 } 1310 1311 static int 1312 pfsync_in_error(struct mbuf *m, int offset, int count, int flags) 1313 { 1314 V_pfsyncstats.pfsyncs_badact++; 1315 1316 m_freem(m); 1317 return (-1); 1318 } 1319 1320 static int 1321 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1322 struct route *rt) 1323 { 1324 m_freem(m); 1325 return (0); 1326 } 1327 1328 /* ARGSUSED */ 1329 static int 1330 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1331 { 1332 struct pfsync_softc *sc = ifp->if_softc; 1333 struct ifreq *ifr = (struct ifreq *)data; 1334 struct pfsyncreq pfsyncr; 1335 size_t nvbuflen; 1336 int error; 1337 int c; 1338 1339 switch (cmd) { 1340 case SIOCSIFFLAGS: 1341 PFSYNC_LOCK(sc); 1342 if (ifp->if_flags & IFF_UP) { 1343 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1344 PFSYNC_UNLOCK(sc); 1345 pfsync_pointers_init(); 1346 } else { 1347 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1348 PFSYNC_UNLOCK(sc); 1349 pfsync_pointers_uninit(); 1350 } 1351 break; 1352 case SIOCSIFMTU: 1353 if (!sc->sc_sync_if || 1354 ifr->ifr_mtu <= PFSYNC_MINPKT || 1355 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1356 return (EINVAL); 1357 if (ifr->ifr_mtu < ifp->if_mtu) { 1358 for (c = 0; c < pfsync_buckets; c++) { 1359 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1360 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1361 pfsync_sendout(1, c); 1362 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1363 } 1364 } 1365 ifp->if_mtu = ifr->ifr_mtu; 1366 break; 1367 case SIOCGETPFSYNC: 1368 bzero(&pfsyncr, sizeof(pfsyncr)); 1369 PFSYNC_LOCK(sc); 1370 if (sc->sc_sync_if) { 1371 strlcpy(pfsyncr.pfsyncr_syncdev, 1372 sc->sc_sync_if->if_xname, IFNAMSIZ); 1373 } 1374 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1375 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1376 pfsyncr.pfsyncr_defer = sc->sc_flags; 1377 PFSYNC_UNLOCK(sc); 1378 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1379 sizeof(pfsyncr))); 1380 1381 case SIOCGETPFSYNCNV: 1382 { 1383 nvlist_t *nvl_syncpeer; 1384 nvlist_t *nvl = nvlist_create(0); 1385 1386 if (nvl == NULL) 1387 return (ENOMEM); 1388 1389 if (sc->sc_sync_if) 1390 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1391 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1392 nvlist_add_number(nvl, "flags", sc->sc_flags); 1393 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1394 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1395 1396 void *packed = NULL; 1397 packed = nvlist_pack(nvl, &nvbuflen); 1398 if (packed == NULL) { 1399 free(packed, M_NVLIST); 1400 nvlist_destroy(nvl); 1401 return (ENOMEM); 1402 } 1403 1404 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1405 ifr->ifr_cap_nv.length = nvbuflen; 1406 ifr->ifr_cap_nv.buffer = NULL; 1407 free(packed, M_NVLIST); 1408 nvlist_destroy(nvl); 1409 return (EFBIG); 1410 } 1411 1412 ifr->ifr_cap_nv.length = nvbuflen; 1413 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1414 1415 nvlist_destroy(nvl); 1416 nvlist_destroy(nvl_syncpeer); 1417 free(packed, M_NVLIST); 1418 break; 1419 } 1420 1421 case SIOCSETPFSYNC: 1422 { 1423 struct pfsync_kstatus status; 1424 1425 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1426 return (error); 1427 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1428 sizeof(pfsyncr)))) 1429 return (error); 1430 1431 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1432 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1433 1434 error = pfsync_kstatus_to_softc(&status, sc); 1435 return (error); 1436 } 1437 case SIOCSETPFSYNCNV: 1438 { 1439 struct pfsync_kstatus status; 1440 void *data; 1441 nvlist_t *nvl; 1442 1443 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1444 return (error); 1445 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1446 return (EINVAL); 1447 1448 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1449 1450 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1451 ifr->ifr_cap_nv.length)) != 0) { 1452 free(data, M_TEMP); 1453 return (error); 1454 } 1455 1456 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1457 free(data, M_TEMP); 1458 return (EINVAL); 1459 } 1460 1461 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1462 pfsync_nvstatus_to_kstatus(nvl, &status); 1463 1464 nvlist_destroy(nvl); 1465 free(data, M_TEMP); 1466 1467 error = pfsync_kstatus_to_softc(&status, sc); 1468 return (error); 1469 } 1470 default: 1471 return (ENOTTY); 1472 } 1473 1474 return (0); 1475 } 1476 1477 static void 1478 pfsync_out_state(struct pf_kstate *st, void *buf) 1479 { 1480 struct pfsync_state *sp = buf; 1481 1482 pfsync_state_export(sp, st); 1483 } 1484 1485 static void 1486 pfsync_out_iack(struct pf_kstate *st, void *buf) 1487 { 1488 struct pfsync_ins_ack *iack = buf; 1489 1490 iack->id = st->id; 1491 iack->creatorid = st->creatorid; 1492 } 1493 1494 static void 1495 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1496 { 1497 struct pfsync_upd_c *up = buf; 1498 1499 bzero(up, sizeof(*up)); 1500 up->id = st->id; 1501 pf_state_peer_hton(&st->src, &up->src); 1502 pf_state_peer_hton(&st->dst, &up->dst); 1503 up->creatorid = st->creatorid; 1504 up->timeout = st->timeout; 1505 } 1506 1507 static void 1508 pfsync_out_del(struct pf_kstate *st, void *buf) 1509 { 1510 struct pfsync_del_c *dp = buf; 1511 1512 dp->id = st->id; 1513 dp->creatorid = st->creatorid; 1514 st->state_flags |= PFSTATE_NOSYNC; 1515 } 1516 1517 static void 1518 pfsync_drop(struct pfsync_softc *sc) 1519 { 1520 struct pf_kstate *st, *next; 1521 struct pfsync_upd_req_item *ur; 1522 struct pfsync_bucket *b; 1523 int c, q; 1524 1525 for (c = 0; c < pfsync_buckets; c++) { 1526 b = &sc->sc_buckets[c]; 1527 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1528 if (TAILQ_EMPTY(&b->b_qs[q])) 1529 continue; 1530 1531 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1532 KASSERT(st->sync_state == q, 1533 ("%s: st->sync_state == q", 1534 __func__)); 1535 st->sync_state = PFSYNC_S_NONE; 1536 pf_release_state(st); 1537 } 1538 TAILQ_INIT(&b->b_qs[q]); 1539 } 1540 1541 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1542 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1543 free(ur, M_PFSYNC); 1544 } 1545 1546 b->b_len = PFSYNC_MINPKT; 1547 b->b_plus = NULL; 1548 } 1549 } 1550 1551 static void 1552 pfsync_sendout(int schedswi, int c) 1553 { 1554 struct pfsync_softc *sc = V_pfsyncif; 1555 struct ifnet *ifp = sc->sc_ifp; 1556 struct mbuf *m; 1557 struct pfsync_header *ph; 1558 struct pfsync_subheader *subh; 1559 struct pf_kstate *st, *st_next; 1560 struct pfsync_upd_req_item *ur; 1561 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1562 int aflen, offset; 1563 int q, count = 0; 1564 1565 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1566 KASSERT(b->b_len > PFSYNC_MINPKT, 1567 ("%s: sc_len %zu", __func__, b->b_len)); 1568 PFSYNC_BUCKET_LOCK_ASSERT(b); 1569 1570 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1571 pfsync_drop(sc); 1572 return; 1573 } 1574 1575 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1576 if (m == NULL) { 1577 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1578 V_pfsyncstats.pfsyncs_onomem++; 1579 return; 1580 } 1581 m->m_data += max_linkhdr; 1582 m->m_len = m->m_pkthdr.len = b->b_len; 1583 1584 /* build the ip header */ 1585 switch (sc->sc_sync_peer.ss_family) { 1586 #ifdef INET 1587 case AF_INET: 1588 { 1589 struct ip *ip; 1590 1591 ip = mtod(m, struct ip *); 1592 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1593 aflen = offset = sizeof(*ip); 1594 1595 ip->ip_len = htons(m->m_pkthdr.len); 1596 ip_fillid(ip); 1597 break; 1598 } 1599 #endif 1600 default: 1601 m_freem(m); 1602 return; 1603 } 1604 1605 1606 /* build the pfsync header */ 1607 ph = (struct pfsync_header *)(m->m_data + offset); 1608 bzero(ph, sizeof(*ph)); 1609 offset += sizeof(*ph); 1610 1611 ph->version = PFSYNC_VERSION; 1612 ph->len = htons(b->b_len - aflen); 1613 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1614 1615 /* walk the queues */ 1616 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1617 if (TAILQ_EMPTY(&b->b_qs[q])) 1618 continue; 1619 1620 subh = (struct pfsync_subheader *)(m->m_data + offset); 1621 offset += sizeof(*subh); 1622 1623 count = 0; 1624 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1625 KASSERT(st->sync_state == q, 1626 ("%s: st->sync_state == q", 1627 __func__)); 1628 /* 1629 * XXXGL: some of write methods do unlocked reads 1630 * of state data :( 1631 */ 1632 pfsync_qs[q].write(st, m->m_data + offset); 1633 offset += pfsync_qs[q].len; 1634 st->sync_state = PFSYNC_S_NONE; 1635 pf_release_state(st); 1636 count++; 1637 } 1638 TAILQ_INIT(&b->b_qs[q]); 1639 1640 bzero(subh, sizeof(*subh)); 1641 subh->action = pfsync_qs[q].action; 1642 subh->count = htons(count); 1643 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1644 } 1645 1646 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1647 subh = (struct pfsync_subheader *)(m->m_data + offset); 1648 offset += sizeof(*subh); 1649 1650 count = 0; 1651 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1652 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1653 1654 bcopy(&ur->ur_msg, m->m_data + offset, 1655 sizeof(ur->ur_msg)); 1656 offset += sizeof(ur->ur_msg); 1657 free(ur, M_PFSYNC); 1658 count++; 1659 } 1660 1661 bzero(subh, sizeof(*subh)); 1662 subh->action = PFSYNC_ACT_UPD_REQ; 1663 subh->count = htons(count); 1664 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1665 } 1666 1667 /* has someone built a custom region for us to add? */ 1668 if (b->b_plus != NULL) { 1669 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 1670 offset += b->b_pluslen; 1671 1672 b->b_plus = NULL; 1673 } 1674 1675 subh = (struct pfsync_subheader *)(m->m_data + offset); 1676 offset += sizeof(*subh); 1677 1678 bzero(subh, sizeof(*subh)); 1679 subh->action = PFSYNC_ACT_EOF; 1680 subh->count = htons(1); 1681 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1682 1683 /* we're done, let's put it on the wire */ 1684 if (ifp->if_bpf) { 1685 m->m_data += aflen; 1686 m->m_len = m->m_pkthdr.len = b->b_len - aflen; 1687 BPF_MTAP(ifp, m); 1688 m->m_data -= aflen; 1689 m->m_len = m->m_pkthdr.len = b->b_len; 1690 } 1691 1692 if (sc->sc_sync_if == NULL) { 1693 b->b_len = PFSYNC_MINPKT; 1694 m_freem(m); 1695 return; 1696 } 1697 1698 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1699 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1700 b->b_len = PFSYNC_MINPKT; 1701 1702 if (!_IF_QFULL(&b->b_snd)) 1703 _IF_ENQUEUE(&b->b_snd, m); 1704 else { 1705 m_freem(m); 1706 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1707 } 1708 if (schedswi) 1709 swi_sched(V_pfsync_swi_cookie, 0); 1710 } 1711 1712 static void 1713 pfsync_insert_state(struct pf_kstate *st) 1714 { 1715 struct pfsync_softc *sc = V_pfsyncif; 1716 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1717 1718 if (st->state_flags & PFSTATE_NOSYNC) 1719 return; 1720 1721 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1722 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1723 st->state_flags |= PFSTATE_NOSYNC; 1724 return; 1725 } 1726 1727 KASSERT(st->sync_state == PFSYNC_S_NONE, 1728 ("%s: st->sync_state %u", __func__, st->sync_state)); 1729 1730 PFSYNC_BUCKET_LOCK(b); 1731 if (b->b_len == PFSYNC_MINPKT) 1732 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1733 1734 pfsync_q_ins(st, PFSYNC_S_INS, true); 1735 PFSYNC_BUCKET_UNLOCK(b); 1736 1737 st->sync_updates = 0; 1738 } 1739 1740 static int 1741 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 1742 { 1743 struct pfsync_softc *sc = V_pfsyncif; 1744 struct pfsync_deferral *pd; 1745 struct pfsync_bucket *b; 1746 1747 if (m->m_flags & (M_BCAST|M_MCAST)) 1748 return (0); 1749 1750 if (sc == NULL) 1751 return (0); 1752 1753 b = pfsync_get_bucket(sc, st); 1754 1755 PFSYNC_LOCK(sc); 1756 1757 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 1758 !(sc->sc_flags & PFSYNCF_DEFER)) { 1759 PFSYNC_UNLOCK(sc); 1760 return (0); 1761 } 1762 1763 PFSYNC_BUCKET_LOCK(b); 1764 PFSYNC_UNLOCK(sc); 1765 1766 if (b->b_deferred >= 128) 1767 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 1768 1769 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1770 if (pd == NULL) { 1771 PFSYNC_BUCKET_UNLOCK(b); 1772 return (0); 1773 } 1774 b->b_deferred++; 1775 1776 m->m_flags |= M_SKIP_FIREWALL; 1777 st->state_flags |= PFSTATE_ACK; 1778 1779 pd->pd_sc = sc; 1780 pd->pd_refs = 0; 1781 pd->pd_st = st; 1782 pf_ref_state(st); 1783 pd->pd_m = m; 1784 1785 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 1786 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 1787 callout_reset(&pd->pd_tmo, PFSYNC_DEFER_TIMEOUT, pfsync_defer_tmo, pd); 1788 1789 pfsync_push(b); 1790 PFSYNC_BUCKET_UNLOCK(b); 1791 1792 return (1); 1793 } 1794 1795 static void 1796 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1797 { 1798 struct pfsync_softc *sc = pd->pd_sc; 1799 struct mbuf *m = pd->pd_m; 1800 struct pf_kstate *st = pd->pd_st; 1801 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1802 1803 PFSYNC_BUCKET_LOCK_ASSERT(b); 1804 1805 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1806 b->b_deferred--; 1807 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1808 free(pd, M_PFSYNC); 1809 pf_release_state(st); 1810 1811 if (drop) 1812 m_freem(m); 1813 else { 1814 _IF_ENQUEUE(&b->b_snd, m); 1815 pfsync_push(b); 1816 } 1817 } 1818 1819 static void 1820 pfsync_defer_tmo(void *arg) 1821 { 1822 struct epoch_tracker et; 1823 struct pfsync_deferral *pd = arg; 1824 struct pfsync_softc *sc = pd->pd_sc; 1825 struct mbuf *m = pd->pd_m; 1826 struct pf_kstate *st = pd->pd_st; 1827 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1828 1829 PFSYNC_BUCKET_LOCK_ASSERT(b); 1830 1831 if (sc->sc_sync_if == NULL) { 1832 PFSYNC_BUCKET_UNLOCK(b); 1833 return; 1834 } 1835 1836 NET_EPOCH_ENTER(et); 1837 CURVNET_SET(sc->sc_sync_if->if_vnet); 1838 1839 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1840 b->b_deferred--; 1841 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1842 if (pd->pd_refs == 0) 1843 free(pd, M_PFSYNC); 1844 PFSYNC_BUCKET_UNLOCK(b); 1845 1846 pfsync_tx(sc, m); 1847 1848 pf_release_state(st); 1849 1850 CURVNET_RESTORE(); 1851 NET_EPOCH_EXIT(et); 1852 } 1853 1854 static void 1855 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 1856 { 1857 struct pfsync_softc *sc = V_pfsyncif; 1858 struct pfsync_deferral *pd; 1859 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1860 1861 PFSYNC_BUCKET_LOCK_ASSERT(b); 1862 1863 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 1864 if (pd->pd_st == st) { 1865 if (callout_stop(&pd->pd_tmo) > 0) 1866 pfsync_undefer(pd, drop); 1867 1868 return; 1869 } 1870 } 1871 1872 panic("%s: unable to find deferred state", __func__); 1873 } 1874 1875 static void 1876 pfsync_undefer_state(struct pf_kstate *st, int drop) 1877 { 1878 struct pfsync_softc *sc = V_pfsyncif; 1879 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1880 1881 PFSYNC_BUCKET_LOCK(b); 1882 pfsync_undefer_state_locked(st, drop); 1883 PFSYNC_BUCKET_UNLOCK(b); 1884 } 1885 1886 static struct pfsync_bucket* 1887 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 1888 { 1889 int c = PF_IDHASH(st) % pfsync_buckets; 1890 return &sc->sc_buckets[c]; 1891 } 1892 1893 static void 1894 pfsync_update_state(struct pf_kstate *st) 1895 { 1896 struct pfsync_softc *sc = V_pfsyncif; 1897 bool sync = false, ref = true; 1898 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1899 1900 PF_STATE_LOCK_ASSERT(st); 1901 PFSYNC_BUCKET_LOCK(b); 1902 1903 if (st->state_flags & PFSTATE_ACK) 1904 pfsync_undefer_state_locked(st, 0); 1905 if (st->state_flags & PFSTATE_NOSYNC) { 1906 if (st->sync_state != PFSYNC_S_NONE) 1907 pfsync_q_del(st, true, b); 1908 PFSYNC_BUCKET_UNLOCK(b); 1909 return; 1910 } 1911 1912 if (b->b_len == PFSYNC_MINPKT) 1913 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1914 1915 switch (st->sync_state) { 1916 case PFSYNC_S_UPD_C: 1917 case PFSYNC_S_UPD: 1918 case PFSYNC_S_INS: 1919 /* we're already handling it */ 1920 1921 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1922 st->sync_updates++; 1923 if (st->sync_updates >= sc->sc_maxupdates) 1924 sync = true; 1925 } 1926 break; 1927 1928 case PFSYNC_S_IACK: 1929 pfsync_q_del(st, false, b); 1930 ref = false; 1931 /* FALLTHROUGH */ 1932 1933 case PFSYNC_S_NONE: 1934 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1935 st->sync_updates = 0; 1936 break; 1937 1938 default: 1939 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1940 } 1941 1942 if (sync || (time_uptime - st->pfsync_time) < 2) 1943 pfsync_push(b); 1944 1945 PFSYNC_BUCKET_UNLOCK(b); 1946 } 1947 1948 static void 1949 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1950 { 1951 struct pfsync_softc *sc = V_pfsyncif; 1952 struct pfsync_bucket *b = &sc->sc_buckets[0]; 1953 struct pfsync_upd_req_item *item; 1954 size_t nlen = sizeof(struct pfsync_upd_req); 1955 1956 PFSYNC_BUCKET_LOCK_ASSERT(b); 1957 1958 /* 1959 * This code does a bit to prevent multiple update requests for the 1960 * same state being generated. It searches current subheader queue, 1961 * but it doesn't lookup into queue of already packed datagrams. 1962 */ 1963 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 1964 if (item->ur_msg.id == id && 1965 item->ur_msg.creatorid == creatorid) 1966 return; 1967 1968 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1969 if (item == NULL) 1970 return; /* XXX stats */ 1971 1972 item->ur_msg.id = id; 1973 item->ur_msg.creatorid = creatorid; 1974 1975 if (TAILQ_EMPTY(&b->b_upd_req_list)) 1976 nlen += sizeof(struct pfsync_subheader); 1977 1978 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 1979 pfsync_sendout(0, 0); 1980 1981 nlen = sizeof(struct pfsync_subheader) + 1982 sizeof(struct pfsync_upd_req); 1983 } 1984 1985 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 1986 b->b_len += nlen; 1987 1988 pfsync_push(b); 1989 } 1990 1991 static bool 1992 pfsync_update_state_req(struct pf_kstate *st) 1993 { 1994 struct pfsync_softc *sc = V_pfsyncif; 1995 bool ref = true, full = false; 1996 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1997 1998 PF_STATE_LOCK_ASSERT(st); 1999 PFSYNC_BUCKET_LOCK(b); 2000 2001 if (st->state_flags & PFSTATE_NOSYNC) { 2002 if (st->sync_state != PFSYNC_S_NONE) 2003 pfsync_q_del(st, true, b); 2004 PFSYNC_BUCKET_UNLOCK(b); 2005 return (full); 2006 } 2007 2008 switch (st->sync_state) { 2009 case PFSYNC_S_UPD_C: 2010 case PFSYNC_S_IACK: 2011 pfsync_q_del(st, false, b); 2012 ref = false; 2013 /* FALLTHROUGH */ 2014 2015 case PFSYNC_S_NONE: 2016 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2017 pfsync_push(b); 2018 break; 2019 2020 case PFSYNC_S_INS: 2021 case PFSYNC_S_UPD: 2022 case PFSYNC_S_DEL: 2023 /* we're already handling it */ 2024 break; 2025 2026 default: 2027 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2028 } 2029 2030 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state)) 2031 full = true; 2032 2033 PFSYNC_BUCKET_UNLOCK(b); 2034 2035 return (full); 2036 } 2037 2038 static void 2039 pfsync_delete_state(struct pf_kstate *st) 2040 { 2041 struct pfsync_softc *sc = V_pfsyncif; 2042 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2043 bool ref = true; 2044 2045 PFSYNC_BUCKET_LOCK(b); 2046 if (st->state_flags & PFSTATE_ACK) 2047 pfsync_undefer_state_locked(st, 1); 2048 if (st->state_flags & PFSTATE_NOSYNC) { 2049 if (st->sync_state != PFSYNC_S_NONE) 2050 pfsync_q_del(st, true, b); 2051 PFSYNC_BUCKET_UNLOCK(b); 2052 return; 2053 } 2054 2055 if (b->b_len == PFSYNC_MINPKT) 2056 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2057 2058 switch (st->sync_state) { 2059 case PFSYNC_S_INS: 2060 /* We never got to tell the world so just forget about it. */ 2061 pfsync_q_del(st, true, b); 2062 break; 2063 2064 case PFSYNC_S_UPD_C: 2065 case PFSYNC_S_UPD: 2066 case PFSYNC_S_IACK: 2067 pfsync_q_del(st, false, b); 2068 ref = false; 2069 /* FALLTHROUGH */ 2070 2071 case PFSYNC_S_NONE: 2072 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 2073 break; 2074 2075 default: 2076 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2077 } 2078 2079 PFSYNC_BUCKET_UNLOCK(b); 2080 } 2081 2082 static void 2083 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2084 { 2085 struct { 2086 struct pfsync_subheader subh; 2087 struct pfsync_clr clr; 2088 } __packed r; 2089 2090 bzero(&r, sizeof(r)); 2091 2092 r.subh.action = PFSYNC_ACT_CLR; 2093 r.subh.count = htons(1); 2094 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2095 2096 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2097 r.clr.creatorid = creatorid; 2098 2099 pfsync_send_plus(&r, sizeof(r)); 2100 } 2101 2102 static void 2103 pfsync_q_ins(struct pf_kstate *st, int q, bool ref) 2104 { 2105 struct pfsync_softc *sc = V_pfsyncif; 2106 size_t nlen = pfsync_qs[q].len; 2107 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2108 2109 PFSYNC_BUCKET_LOCK_ASSERT(b); 2110 2111 KASSERT(st->sync_state == PFSYNC_S_NONE, 2112 ("%s: st->sync_state %u", __func__, st->sync_state)); 2113 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2114 b->b_len)); 2115 2116 if (TAILQ_EMPTY(&b->b_qs[q])) 2117 nlen += sizeof(struct pfsync_subheader); 2118 2119 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2120 pfsync_sendout(1, b->b_id); 2121 2122 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2123 } 2124 2125 b->b_len += nlen; 2126 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2127 st->sync_state = q; 2128 if (ref) 2129 pf_ref_state(st); 2130 } 2131 2132 static void 2133 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2134 { 2135 int q = st->sync_state; 2136 2137 PFSYNC_BUCKET_LOCK_ASSERT(b); 2138 KASSERT(st->sync_state != PFSYNC_S_NONE, 2139 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2140 2141 b->b_len -= pfsync_qs[q].len; 2142 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2143 st->sync_state = PFSYNC_S_NONE; 2144 if (unref) 2145 pf_release_state(st); 2146 2147 if (TAILQ_EMPTY(&b->b_qs[q])) 2148 b->b_len -= sizeof(struct pfsync_subheader); 2149 } 2150 2151 static void 2152 pfsync_bulk_start(void) 2153 { 2154 struct pfsync_softc *sc = V_pfsyncif; 2155 2156 if (V_pf_status.debug >= PF_DEBUG_MISC) 2157 printf("pfsync: received bulk update request\n"); 2158 2159 PFSYNC_BLOCK(sc); 2160 2161 sc->sc_ureq_received = time_uptime; 2162 sc->sc_bulk_hashid = 0; 2163 sc->sc_bulk_stateid = 0; 2164 pfsync_bulk_status(PFSYNC_BUS_START); 2165 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2166 PFSYNC_BUNLOCK(sc); 2167 } 2168 2169 static void 2170 pfsync_bulk_update(void *arg) 2171 { 2172 struct pfsync_softc *sc = arg; 2173 struct pf_kstate *s; 2174 int i; 2175 2176 PFSYNC_BLOCK_ASSERT(sc); 2177 CURVNET_SET(sc->sc_ifp->if_vnet); 2178 2179 /* 2180 * Start with last state from previous invocation. 2181 * It may had gone, in this case start from the 2182 * hash slot. 2183 */ 2184 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2185 2186 if (s != NULL) 2187 i = PF_IDHASH(s); 2188 else 2189 i = sc->sc_bulk_hashid; 2190 2191 for (; i <= pf_hashmask; i++) { 2192 struct pf_idhash *ih = &V_pf_idhash[i]; 2193 2194 if (s != NULL) 2195 PF_HASHROW_ASSERT(ih); 2196 else { 2197 PF_HASHROW_LOCK(ih); 2198 s = LIST_FIRST(&ih->states); 2199 } 2200 2201 for (; s; s = LIST_NEXT(s, entry)) { 2202 if (s->sync_state == PFSYNC_S_NONE && 2203 s->timeout < PFTM_MAX && 2204 s->pfsync_time <= sc->sc_ureq_received) { 2205 if (pfsync_update_state_req(s)) { 2206 /* We've filled a packet. */ 2207 sc->sc_bulk_hashid = i; 2208 sc->sc_bulk_stateid = s->id; 2209 sc->sc_bulk_creatorid = s->creatorid; 2210 PF_HASHROW_UNLOCK(ih); 2211 callout_reset(&sc->sc_bulk_tmo, 1, 2212 pfsync_bulk_update, sc); 2213 goto full; 2214 } 2215 } 2216 } 2217 PF_HASHROW_UNLOCK(ih); 2218 } 2219 2220 /* We're done. */ 2221 pfsync_bulk_status(PFSYNC_BUS_END); 2222 full: 2223 CURVNET_RESTORE(); 2224 } 2225 2226 static void 2227 pfsync_bulk_status(u_int8_t status) 2228 { 2229 struct { 2230 struct pfsync_subheader subh; 2231 struct pfsync_bus bus; 2232 } __packed r; 2233 2234 struct pfsync_softc *sc = V_pfsyncif; 2235 2236 bzero(&r, sizeof(r)); 2237 2238 r.subh.action = PFSYNC_ACT_BUS; 2239 r.subh.count = htons(1); 2240 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2241 2242 r.bus.creatorid = V_pf_status.hostid; 2243 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2244 r.bus.status = status; 2245 2246 pfsync_send_plus(&r, sizeof(r)); 2247 } 2248 2249 static void 2250 pfsync_bulk_fail(void *arg) 2251 { 2252 struct pfsync_softc *sc = arg; 2253 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2254 2255 CURVNET_SET(sc->sc_ifp->if_vnet); 2256 2257 PFSYNC_BLOCK_ASSERT(sc); 2258 2259 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2260 /* Try again */ 2261 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2262 pfsync_bulk_fail, V_pfsyncif); 2263 PFSYNC_BUCKET_LOCK(b); 2264 pfsync_request_update(0, 0); 2265 PFSYNC_BUCKET_UNLOCK(b); 2266 } else { 2267 /* Pretend like the transfer was ok. */ 2268 sc->sc_ureq_sent = 0; 2269 sc->sc_bulk_tries = 0; 2270 PFSYNC_LOCK(sc); 2271 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2272 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2273 "pfsync bulk fail"); 2274 sc->sc_flags |= PFSYNCF_OK; 2275 PFSYNC_UNLOCK(sc); 2276 if (V_pf_status.debug >= PF_DEBUG_MISC) 2277 printf("pfsync: failed to receive bulk update\n"); 2278 } 2279 2280 CURVNET_RESTORE(); 2281 } 2282 2283 static void 2284 pfsync_send_plus(void *plus, size_t pluslen) 2285 { 2286 struct pfsync_softc *sc = V_pfsyncif; 2287 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2288 2289 PFSYNC_BUCKET_LOCK(b); 2290 2291 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2292 pfsync_sendout(1, b->b_id); 2293 2294 b->b_plus = plus; 2295 b->b_len += (b->b_pluslen = pluslen); 2296 2297 pfsync_sendout(1, b->b_id); 2298 PFSYNC_BUCKET_UNLOCK(b); 2299 } 2300 2301 static void 2302 pfsync_timeout(void *arg) 2303 { 2304 struct pfsync_bucket *b = arg; 2305 2306 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2307 PFSYNC_BUCKET_LOCK(b); 2308 pfsync_push(b); 2309 PFSYNC_BUCKET_UNLOCK(b); 2310 CURVNET_RESTORE(); 2311 } 2312 2313 static void 2314 pfsync_push(struct pfsync_bucket *b) 2315 { 2316 2317 PFSYNC_BUCKET_LOCK_ASSERT(b); 2318 2319 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2320 swi_sched(V_pfsync_swi_cookie, 0); 2321 } 2322 2323 static void 2324 pfsync_push_all(struct pfsync_softc *sc) 2325 { 2326 int c; 2327 struct pfsync_bucket *b; 2328 2329 for (c = 0; c < pfsync_buckets; c++) { 2330 b = &sc->sc_buckets[c]; 2331 2332 PFSYNC_BUCKET_LOCK(b); 2333 pfsync_push(b); 2334 PFSYNC_BUCKET_UNLOCK(b); 2335 } 2336 } 2337 2338 static void 2339 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2340 { 2341 struct ip *ip; 2342 int af, error = 0; 2343 2344 ip = mtod(m, struct ip *); 2345 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2346 2347 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2348 2349 /* 2350 * We distinguish between a deferral packet and our 2351 * own pfsync packet based on M_SKIP_FIREWALL 2352 * flag. This is XXX. 2353 */ 2354 switch (af) { 2355 #ifdef INET 2356 case AF_INET: 2357 if (m->m_flags & M_SKIP_FIREWALL) { 2358 error = ip_output(m, NULL, NULL, 0, 2359 NULL, NULL); 2360 } else { 2361 error = ip_output(m, NULL, NULL, 2362 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2363 } 2364 break; 2365 #endif 2366 #ifdef INET6 2367 case AF_INET6: 2368 if (m->m_flags & M_SKIP_FIREWALL) { 2369 error = ip6_output(m, NULL, NULL, 0, 2370 NULL, NULL, NULL); 2371 } else { 2372 MPASS(false); 2373 /* We don't support pfsync over IPv6. */ 2374 /*error = ip6_output(m, NULL, NULL, 2375 IP_RAWOUTPUT, &sc->sc_imo6, NULL);*/ 2376 } 2377 break; 2378 #endif 2379 } 2380 2381 if (error == 0) 2382 V_pfsyncstats.pfsyncs_opackets++; 2383 else 2384 V_pfsyncstats.pfsyncs_oerrors++; 2385 2386 } 2387 2388 static void 2389 pfsyncintr(void *arg) 2390 { 2391 struct epoch_tracker et; 2392 struct pfsync_softc *sc = arg; 2393 struct pfsync_bucket *b; 2394 struct mbuf *m, *n; 2395 int c; 2396 2397 NET_EPOCH_ENTER(et); 2398 CURVNET_SET(sc->sc_ifp->if_vnet); 2399 2400 for (c = 0; c < pfsync_buckets; c++) { 2401 b = &sc->sc_buckets[c]; 2402 2403 PFSYNC_BUCKET_LOCK(b); 2404 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2405 pfsync_sendout(0, b->b_id); 2406 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2407 } 2408 _IF_DEQUEUE_ALL(&b->b_snd, m); 2409 PFSYNC_BUCKET_UNLOCK(b); 2410 2411 for (; m != NULL; m = n) { 2412 n = m->m_nextpkt; 2413 m->m_nextpkt = NULL; 2414 2415 pfsync_tx(sc, m); 2416 } 2417 } 2418 CURVNET_RESTORE(); 2419 NET_EPOCH_EXIT(et); 2420 } 2421 2422 static int 2423 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2424 struct in_mfilter *imf) 2425 { 2426 struct ip_moptions *imo = &sc->sc_imo; 2427 int error; 2428 2429 if (!(ifp->if_flags & IFF_MULTICAST)) 2430 return (EADDRNOTAVAIL); 2431 2432 switch (sc->sc_sync_peer.ss_family) { 2433 #ifdef INET 2434 case AF_INET: 2435 { 2436 ip_mfilter_init(&imo->imo_head); 2437 imo->imo_multicast_vif = -1; 2438 if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2439 &imf->imf_inm)) != 0) 2440 return (error); 2441 2442 ip_mfilter_insert(&imo->imo_head, imf); 2443 imo->imo_multicast_ifp = ifp; 2444 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2445 imo->imo_multicast_loop = 0; 2446 break; 2447 } 2448 #endif 2449 } 2450 2451 return (0); 2452 } 2453 2454 static void 2455 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2456 { 2457 struct ip_moptions *imo = &sc->sc_imo; 2458 struct in_mfilter *imf; 2459 2460 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2461 ip_mfilter_remove(&imo->imo_head, imf); 2462 in_leavegroup(imf->imf_inm, NULL); 2463 ip_mfilter_free(imf); 2464 } 2465 imo->imo_multicast_ifp = NULL; 2466 } 2467 2468 void 2469 pfsync_detach_ifnet(struct ifnet *ifp) 2470 { 2471 struct pfsync_softc *sc = V_pfsyncif; 2472 2473 if (sc == NULL) 2474 return; 2475 2476 PFSYNC_LOCK(sc); 2477 2478 if (sc->sc_sync_if == ifp) { 2479 /* We don't need mutlicast cleanup here, because the interface 2480 * is going away. We do need to ensure we don't try to do 2481 * cleanup later. 2482 */ 2483 ip_mfilter_init(&sc->sc_imo.imo_head); 2484 sc->sc_imo.imo_multicast_ifp = NULL; 2485 sc->sc_sync_if = NULL; 2486 } 2487 2488 PFSYNC_UNLOCK(sc); 2489 } 2490 2491 static int 2492 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2493 { 2494 struct sockaddr_storage sa; 2495 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2496 status->flags = pfsyncr->pfsyncr_defer; 2497 2498 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2499 2500 memset(&sa, 0, sizeof(sa)); 2501 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2502 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2503 in->sin_family = AF_INET; 2504 in->sin_len = sizeof(*in); 2505 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2506 } 2507 status->syncpeer = sa; 2508 2509 return 0; 2510 } 2511 2512 static int 2513 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2514 { 2515 struct in_mfilter *imf = NULL; 2516 struct ifnet *sifp; 2517 struct ip *ip; 2518 int error; 2519 int c; 2520 2521 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2522 return (EINVAL); 2523 2524 if (status->syncdev[0] == '\0') 2525 sifp = NULL; 2526 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2527 return (EINVAL); 2528 2529 struct sockaddr_in *status_sin = 2530 (struct sockaddr_in *)&(status->syncpeer); 2531 if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || 2532 status_sin->sin_addr.s_addr == 2533 htonl(INADDR_PFSYNC_GROUP))) 2534 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2535 2536 PFSYNC_LOCK(sc); 2537 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 2538 sc_sin->sin_family = AF_INET; 2539 sc_sin->sin_len = sizeof(*sc_sin); 2540 if (status_sin->sin_addr.s_addr == 0) { 2541 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 2542 } else { 2543 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 2544 } 2545 2546 sc->sc_maxupdates = status->maxupdates; 2547 if (status->flags & PFSYNCF_DEFER) { 2548 sc->sc_flags |= PFSYNCF_DEFER; 2549 V_pfsync_defer_ptr = pfsync_defer; 2550 } else { 2551 sc->sc_flags &= ~PFSYNCF_DEFER; 2552 V_pfsync_defer_ptr = NULL; 2553 } 2554 2555 if (sifp == NULL) { 2556 if (sc->sc_sync_if) 2557 if_rele(sc->sc_sync_if); 2558 sc->sc_sync_if = NULL; 2559 pfsync_multicast_cleanup(sc); 2560 PFSYNC_UNLOCK(sc); 2561 return (0); 2562 } 2563 2564 for (c = 0; c < pfsync_buckets; c++) { 2565 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 2566 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 2567 (sifp->if_mtu < sc->sc_ifp->if_mtu || 2568 (sc->sc_sync_if != NULL && 2569 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 2570 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 2571 pfsync_sendout(1, c); 2572 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 2573 } 2574 2575 pfsync_multicast_cleanup(sc); 2576 2577 if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 2578 error = pfsync_multicast_setup(sc, sifp, imf); 2579 if (error) { 2580 if_rele(sifp); 2581 ip_mfilter_free(imf); 2582 PFSYNC_UNLOCK(sc); 2583 return (error); 2584 } 2585 } 2586 if (sc->sc_sync_if) 2587 if_rele(sc->sc_sync_if); 2588 sc->sc_sync_if = sifp; 2589 2590 ip = &sc->sc_template.ipv4; 2591 bzero(ip, sizeof(*ip)); 2592 ip->ip_v = IPVERSION; 2593 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 2594 ip->ip_tos = IPTOS_LOWDELAY; 2595 /* len and id are set later. */ 2596 ip->ip_off = htons(IP_DF); 2597 ip->ip_ttl = PFSYNC_DFLTTL; 2598 ip->ip_p = IPPROTO_PFSYNC; 2599 ip->ip_src.s_addr = INADDR_ANY; 2600 ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; 2601 2602 /* Request a full state table update. */ 2603 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2604 (*carp_demote_adj_p)(V_pfsync_carp_adj, 2605 "pfsync bulk start"); 2606 sc->sc_flags &= ~PFSYNCF_OK; 2607 if (V_pf_status.debug >= PF_DEBUG_MISC) 2608 printf("pfsync: requesting bulk update\n"); 2609 PFSYNC_UNLOCK(sc); 2610 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 2611 pfsync_request_update(0, 0); 2612 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 2613 PFSYNC_BLOCK(sc); 2614 sc->sc_ureq_sent = time_uptime; 2615 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 2616 PFSYNC_BUNLOCK(sc); 2617 return (0); 2618 } 2619 2620 static void 2621 pfsync_pointers_init(void) 2622 { 2623 2624 PF_RULES_WLOCK(); 2625 V_pfsync_state_import_ptr = pfsync_state_import; 2626 V_pfsync_insert_state_ptr = pfsync_insert_state; 2627 V_pfsync_update_state_ptr = pfsync_update_state; 2628 V_pfsync_delete_state_ptr = pfsync_delete_state; 2629 V_pfsync_clear_states_ptr = pfsync_clear_states; 2630 V_pfsync_defer_ptr = pfsync_defer; 2631 PF_RULES_WUNLOCK(); 2632 } 2633 2634 static void 2635 pfsync_pointers_uninit(void) 2636 { 2637 2638 PF_RULES_WLOCK(); 2639 V_pfsync_state_import_ptr = NULL; 2640 V_pfsync_insert_state_ptr = NULL; 2641 V_pfsync_update_state_ptr = NULL; 2642 V_pfsync_delete_state_ptr = NULL; 2643 V_pfsync_clear_states_ptr = NULL; 2644 V_pfsync_defer_ptr = NULL; 2645 PF_RULES_WUNLOCK(); 2646 } 2647 2648 static void 2649 vnet_pfsync_init(const void *unused __unused) 2650 { 2651 int error; 2652 2653 V_pfsync_cloner = if_clone_simple(pfsyncname, 2654 pfsync_clone_create, pfsync_clone_destroy, 1); 2655 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 2656 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2657 if (error) { 2658 if_clone_detach(V_pfsync_cloner); 2659 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2660 } 2661 2662 pfsync_pointers_init(); 2663 } 2664 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2665 vnet_pfsync_init, NULL); 2666 2667 static void 2668 vnet_pfsync_uninit(const void *unused __unused) 2669 { 2670 int ret __diagused; 2671 2672 pfsync_pointers_uninit(); 2673 2674 if_clone_detach(V_pfsync_cloner); 2675 ret = swi_remove(V_pfsync_swi_cookie); 2676 MPASS(ret == 0); 2677 ret = intr_event_destroy(V_pfsync_swi_ie); 2678 MPASS(ret == 0); 2679 } 2680 2681 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 2682 vnet_pfsync_uninit, NULL); 2683 2684 static int 2685 pfsync_init(void) 2686 { 2687 #ifdef INET 2688 int error; 2689 2690 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 2691 2692 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 2693 if (error) 2694 return (error); 2695 #endif 2696 2697 return (0); 2698 } 2699 2700 static void 2701 pfsync_uninit(void) 2702 { 2703 pfsync_detach_ifnet_ptr = NULL; 2704 2705 #ifdef INET 2706 ipproto_unregister(IPPROTO_PFSYNC); 2707 #endif 2708 } 2709 2710 static int 2711 pfsync_modevent(module_t mod, int type, void *data) 2712 { 2713 int error = 0; 2714 2715 switch (type) { 2716 case MOD_LOAD: 2717 error = pfsync_init(); 2718 break; 2719 case MOD_UNLOAD: 2720 pfsync_uninit(); 2721 break; 2722 default: 2723 error = EINVAL; 2724 break; 2725 } 2726 2727 return (error); 2728 } 2729 2730 static moduledata_t pfsync_mod = { 2731 pfsyncname, 2732 pfsync_modevent, 2733 0 2734 }; 2735 2736 #define PFSYNC_MODVER 1 2737 2738 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2739 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2740 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2741 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2742