1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/nv.h> 79 #include <sys/priv.h> 80 #include <sys/smp.h> 81 #include <sys/socket.h> 82 #include <sys/sockio.h> 83 #include <sys/sysctl.h> 84 #include <sys/syslog.h> 85 86 #include <net/bpf.h> 87 #include <net/if.h> 88 #include <net/if_var.h> 89 #include <net/if_clone.h> 90 #include <net/if_private.h> 91 #include <net/if_types.h> 92 #include <net/vnet.h> 93 #include <net/pfvar.h> 94 #include <net/if_pfsync.h> 95 96 #include <netinet/if_ether.h> 97 #include <netinet/in.h> 98 #include <netinet/in_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip_carp.h> 101 #include <netinet/ip_var.h> 102 #include <netinet/tcp.h> 103 #include <netinet/tcp_fsm.h> 104 #include <netinet/tcp_seq.h> 105 106 #include <netinet/ip6.h> 107 #include <netinet6/ip6_var.h> 108 109 #include <netpfil/pf/pfsync_nv.h> 110 111 struct pfsync_bucket; 112 struct pfsync_softc; 113 114 union inet_template { 115 struct ip ipv4; 116 }; 117 118 #define PFSYNC_MINPKT ( \ 119 sizeof(union inet_template) + \ 120 sizeof(struct pfsync_header) + \ 121 sizeof(struct pfsync_subheader) ) 122 123 static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, 124 struct pfsync_state_peer *); 125 static int pfsync_in_clr(struct mbuf *, int, int, int); 126 static int pfsync_in_ins(struct mbuf *, int, int, int); 127 static int pfsync_in_iack(struct mbuf *, int, int, int); 128 static int pfsync_in_upd(struct mbuf *, int, int, int); 129 static int pfsync_in_upd_c(struct mbuf *, int, int, int); 130 static int pfsync_in_ureq(struct mbuf *, int, int, int); 131 static int pfsync_in_del_c(struct mbuf *, int, int, int); 132 static int pfsync_in_bus(struct mbuf *, int, int, int); 133 static int pfsync_in_tdb(struct mbuf *, int, int, int); 134 static int pfsync_in_eof(struct mbuf *, int, int, int); 135 static int pfsync_in_error(struct mbuf *, int, int, int); 136 137 static int (*pfsync_acts[])(struct mbuf *, int, int, int) = { 138 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 139 pfsync_in_ins, /* PFSYNC_ACT_INS */ 140 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 141 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 142 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 143 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 144 pfsync_in_error, /* PFSYNC_ACT_DEL */ 145 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 146 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 148 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 149 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 150 pfsync_in_eof /* PFSYNC_ACT_EOF */ 151 }; 152 153 struct pfsync_q { 154 void (*write)(struct pf_kstate *, void *); 155 size_t len; 156 u_int8_t action; 157 }; 158 159 /* we have one of these for every PFSYNC_S_ */ 160 static void pfsync_out_state(struct pf_kstate *, void *); 161 static void pfsync_out_iack(struct pf_kstate *, void *); 162 static void pfsync_out_upd_c(struct pf_kstate *, void *); 163 static void pfsync_out_del_c(struct pf_kstate *, void *); 164 165 static struct pfsync_q pfsync_qs[] = { 166 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 167 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 168 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 169 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 170 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 171 }; 172 173 static void pfsync_q_ins(struct pf_kstate *, int, bool); 174 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 175 176 static void pfsync_update_state(struct pf_kstate *); 177 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 178 179 struct pfsync_upd_req_item { 180 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 181 struct pfsync_upd_req ur_msg; 182 }; 183 184 struct pfsync_deferral { 185 struct pfsync_softc *pd_sc; 186 TAILQ_ENTRY(pfsync_deferral) pd_entry; 187 struct callout pd_tmo; 188 189 struct pf_kstate *pd_st; 190 struct mbuf *pd_m; 191 }; 192 193 struct pfsync_bucket 194 { 195 int b_id; 196 struct pfsync_softc *b_sc; 197 struct mtx b_mtx; 198 struct callout b_tmo; 199 int b_flags; 200 #define PFSYNCF_BUCKET_PUSH 0x00000001 201 202 size_t b_len; 203 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_S_COUNT]; 204 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 205 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 206 u_int b_deferred; 207 void *b_plus; 208 size_t b_pluslen; 209 210 struct ifaltq b_snd; 211 }; 212 213 struct pfsync_softc { 214 /* Configuration */ 215 struct ifnet *sc_ifp; 216 struct ifnet *sc_sync_if; 217 struct ip_moptions sc_imo; 218 struct sockaddr_storage sc_sync_peer; 219 uint32_t sc_flags; 220 uint8_t sc_maxupdates; 221 union inet_template sc_template; 222 struct mtx sc_mtx; 223 224 /* Queued data */ 225 struct pfsync_bucket *sc_buckets; 226 227 /* Bulk update info */ 228 struct mtx sc_bulk_mtx; 229 uint32_t sc_ureq_sent; 230 int sc_bulk_tries; 231 uint32_t sc_ureq_received; 232 int sc_bulk_hashid; 233 uint64_t sc_bulk_stateid; 234 uint32_t sc_bulk_creatorid; 235 struct callout sc_bulk_tmo; 236 struct callout sc_bulkfail_tmo; 237 }; 238 239 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 240 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 241 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 242 243 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 244 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 245 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 246 247 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 248 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 249 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 250 251 static const char pfsyncname[] = "pfsync"; 252 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 253 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 254 #define V_pfsyncif VNET(pfsyncif) 255 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 256 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 257 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 258 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 259 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 260 #define V_pfsyncstats VNET(pfsyncstats) 261 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 262 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 263 264 static void pfsync_timeout(void *); 265 static void pfsync_push(struct pfsync_bucket *); 266 static void pfsync_push_all(struct pfsync_softc *); 267 static void pfsyncintr(void *); 268 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 269 struct in_mfilter *imf); 270 static void pfsync_multicast_cleanup(struct pfsync_softc *); 271 static void pfsync_pointers_init(void); 272 static void pfsync_pointers_uninit(void); 273 static int pfsync_init(void); 274 static void pfsync_uninit(void); 275 276 static unsigned long pfsync_buckets; 277 278 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 279 "PFSYNC"); 280 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 281 &VNET_NAME(pfsyncstats), pfsyncstats, 282 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 283 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 284 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 285 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 286 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 287 288 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 289 static void pfsync_clone_destroy(struct ifnet *); 290 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 291 struct pf_state_peer *); 292 static int pfsyncoutput(struct ifnet *, struct mbuf *, 293 const struct sockaddr *, struct route *); 294 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 295 296 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 297 static void pfsync_undefer(struct pfsync_deferral *, int); 298 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 299 static void pfsync_undefer_state(struct pf_kstate *, int); 300 static void pfsync_defer_tmo(void *); 301 302 static void pfsync_request_update(u_int32_t, u_int64_t); 303 static bool pfsync_update_state_req(struct pf_kstate *); 304 305 static void pfsync_drop(struct pfsync_softc *); 306 static void pfsync_sendout(int, int); 307 static void pfsync_send_plus(void *, size_t); 308 309 static void pfsync_bulk_start(void); 310 static void pfsync_bulk_status(u_int8_t); 311 static void pfsync_bulk_update(void *); 312 static void pfsync_bulk_fail(void *); 313 314 static void pfsync_detach_ifnet(struct ifnet *); 315 316 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 317 struct pfsync_kstatus *); 318 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 319 struct pfsync_softc *); 320 321 #ifdef IPSEC 322 static void pfsync_update_net_tdb(struct pfsync_tdb *); 323 #endif 324 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 325 struct pf_kstate *); 326 327 #define PFSYNC_MAX_BULKTRIES 12 328 #define PFSYNC_DEFER_TIMEOUT ((20 * hz) / 1000) 329 330 VNET_DEFINE(struct if_clone *, pfsync_cloner); 331 #define V_pfsync_cloner VNET(pfsync_cloner) 332 333 static int 334 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 335 { 336 struct pfsync_softc *sc; 337 struct ifnet *ifp; 338 struct pfsync_bucket *b; 339 int c, q; 340 341 if (unit != 0) 342 return (EINVAL); 343 344 if (! pfsync_buckets) 345 pfsync_buckets = mp_ncpus * 2; 346 347 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 348 sc->sc_flags |= PFSYNCF_OK; 349 sc->sc_maxupdates = 128; 350 351 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 352 if (ifp == NULL) { 353 free(sc, M_PFSYNC); 354 return (ENOSPC); 355 } 356 if_initname(ifp, pfsyncname, unit); 357 ifp->if_softc = sc; 358 ifp->if_ioctl = pfsyncioctl; 359 ifp->if_output = pfsyncoutput; 360 ifp->if_type = IFT_PFSYNC; 361 ifp->if_hdrlen = sizeof(struct pfsync_header); 362 ifp->if_mtu = ETHERMTU; 363 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 364 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 365 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 366 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 367 368 if_attach(ifp); 369 370 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 371 372 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 373 M_PFSYNC, M_ZERO | M_WAITOK); 374 for (c = 0; c < pfsync_buckets; c++) { 375 b = &sc->sc_buckets[c]; 376 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 377 378 b->b_id = c; 379 b->b_sc = sc; 380 b->b_len = PFSYNC_MINPKT; 381 382 for (q = 0; q < PFSYNC_S_COUNT; q++) 383 TAILQ_INIT(&b->b_qs[q]); 384 385 TAILQ_INIT(&b->b_upd_req_list); 386 TAILQ_INIT(&b->b_deferrals); 387 388 callout_init(&b->b_tmo, 1); 389 390 b->b_snd.ifq_maxlen = ifqmaxlen; 391 } 392 393 V_pfsyncif = sc; 394 395 return (0); 396 } 397 398 static void 399 pfsync_clone_destroy(struct ifnet *ifp) 400 { 401 struct pfsync_softc *sc = ifp->if_softc; 402 struct pfsync_bucket *b; 403 int c, ret; 404 405 for (c = 0; c < pfsync_buckets; c++) { 406 b = &sc->sc_buckets[c]; 407 /* 408 * At this stage, everything should have already been 409 * cleared by pfsync_uninit(), and we have only to 410 * drain callouts. 411 */ 412 PFSYNC_BUCKET_LOCK(b); 413 while (b->b_deferred > 0) { 414 struct pfsync_deferral *pd = 415 TAILQ_FIRST(&b->b_deferrals); 416 417 ret = callout_stop(&pd->pd_tmo); 418 PFSYNC_BUCKET_UNLOCK(b); 419 if (ret > 0) { 420 pfsync_undefer(pd, 1); 421 } else { 422 callout_drain(&pd->pd_tmo); 423 } 424 PFSYNC_BUCKET_LOCK(b); 425 } 426 MPASS(b->b_deferred == 0); 427 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 428 PFSYNC_BUCKET_UNLOCK(b); 429 430 callout_drain(&b->b_tmo); 431 } 432 433 callout_drain(&sc->sc_bulkfail_tmo); 434 callout_drain(&sc->sc_bulk_tmo); 435 436 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 437 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 438 bpfdetach(ifp); 439 if_detach(ifp); 440 441 pfsync_drop(sc); 442 443 if_free(ifp); 444 pfsync_multicast_cleanup(sc); 445 mtx_destroy(&sc->sc_mtx); 446 mtx_destroy(&sc->sc_bulk_mtx); 447 448 free(sc->sc_buckets, M_PFSYNC); 449 free(sc, M_PFSYNC); 450 451 V_pfsyncif = NULL; 452 } 453 454 static int 455 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 456 struct pf_state_peer *d) 457 { 458 if (s->scrub.scrub_flag && d->scrub == NULL) { 459 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 460 if (d->scrub == NULL) 461 return (ENOMEM); 462 } 463 464 return (0); 465 } 466 467 static int 468 pfsync_state_import(struct pfsync_state *sp, int flags) 469 { 470 struct pfsync_softc *sc = V_pfsyncif; 471 #ifndef __NO_STRICT_ALIGNMENT 472 struct pfsync_state_key key[2]; 473 #endif 474 struct pfsync_state_key *kw, *ks; 475 struct pf_kstate *st = NULL; 476 struct pf_state_key *skw = NULL, *sks = NULL; 477 struct pf_krule *r = NULL; 478 struct pfi_kkif *kif; 479 int error; 480 481 PF_RULES_RASSERT(); 482 483 if (sp->creatorid == 0) { 484 if (V_pf_status.debug >= PF_DEBUG_MISC) 485 printf("%s: invalid creator id: %08x\n", __func__, 486 ntohl(sp->creatorid)); 487 return (EINVAL); 488 } 489 490 if ((kif = pfi_kkif_find(sp->ifname)) == NULL) { 491 if (V_pf_status.debug >= PF_DEBUG_MISC) 492 printf("%s: unknown interface: %s\n", __func__, 493 sp->ifname); 494 if (flags & PFSYNC_SI_IOCTL) 495 return (EINVAL); 496 return (0); /* skip this state */ 497 } 498 499 /* 500 * If the ruleset checksums match or the state is coming from the ioctl, 501 * it's safe to associate the state with the rule of that number. 502 */ 503 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 504 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 505 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 506 r = pf_main_ruleset.rules[ 507 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 508 else 509 r = &V_pf_default_rule; 510 511 if ((r->max_states && 512 counter_u64_fetch(r->states_cur) >= r->max_states)) 513 goto cleanup; 514 515 /* 516 * XXXGL: consider M_WAITOK in ioctl path after. 517 */ 518 st = pf_alloc_state(M_NOWAIT); 519 if (__predict_false(st == NULL)) 520 goto cleanup; 521 522 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 523 goto cleanup; 524 525 #ifndef __NO_STRICT_ALIGNMENT 526 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 527 kw = &key[PF_SK_WIRE]; 528 ks = &key[PF_SK_STACK]; 529 #else 530 kw = &sp->key[PF_SK_WIRE]; 531 ks = &sp->key[PF_SK_STACK]; 532 #endif 533 534 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 535 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 536 kw->port[0] != ks->port[0] || 537 kw->port[1] != ks->port[1]) { 538 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 539 if (sks == NULL) 540 goto cleanup; 541 } else 542 sks = skw; 543 544 /* allocate memory for scrub info */ 545 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 546 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 547 goto cleanup; 548 549 /* Copy to state key(s). */ 550 skw->addr[0] = kw->addr[0]; 551 skw->addr[1] = kw->addr[1]; 552 skw->port[0] = kw->port[0]; 553 skw->port[1] = kw->port[1]; 554 skw->proto = sp->proto; 555 skw->af = sp->af; 556 if (sks != skw) { 557 sks->addr[0] = ks->addr[0]; 558 sks->addr[1] = ks->addr[1]; 559 sks->port[0] = ks->port[0]; 560 sks->port[1] = ks->port[1]; 561 sks->proto = sp->proto; 562 sks->af = sp->af; 563 } 564 565 /* copy to state */ 566 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 567 st->creation = time_uptime - ntohl(sp->creation); 568 st->expire = time_uptime; 569 if (sp->expire) { 570 uint32_t timeout; 571 572 timeout = r->timeout[sp->timeout]; 573 if (!timeout) 574 timeout = V_pf_default_rule.timeout[sp->timeout]; 575 576 /* sp->expire may have been adaptively scaled by export. */ 577 st->expire -= timeout - ntohl(sp->expire); 578 } 579 580 st->direction = sp->direction; 581 st->log = sp->log; 582 st->timeout = sp->timeout; 583 /* 8 from old peers, 16 bits from new peers */ 584 st->state_flags = sp->state_flags_compat | ntohs(sp->state_flags); 585 586 if (r == &V_pf_default_rule) { 587 /* ToS and Prio are not sent over struct pfsync_state */ 588 st->state_flags &= ~PFSTATE_SETMASK; 589 } else { 590 /* Most actions are applied form state, not from rule. Until 591 * pfsync can forward all those actions and their parameters we 592 * must relay on restoring them from the found rule. 593 * It's a copy of pf_rule_to_actions() */ 594 st->qid = r->qid; 595 st->pqid = r->pqid; 596 st->rtableid = r->rtableid; 597 if (r->scrub_flags & PFSTATE_SETTOS) 598 st->set_tos = r->set_tos; 599 st->min_ttl = r->min_ttl; 600 st->max_mss = r->max_mss; 601 st->state_flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 602 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 603 st->dnpipe = r->dnpipe; 604 st->dnrpipe = r->dnrpipe; 605 /* FIXME: dnflags are not part of state, can't update them */ 606 } 607 608 st->id = sp->id; 609 st->creatorid = sp->creatorid; 610 pf_state_peer_ntoh(&sp->src, &st->src); 611 pf_state_peer_ntoh(&sp->dst, &st->dst); 612 613 st->rule.ptr = r; 614 st->nat_rule.ptr = NULL; 615 st->anchor.ptr = NULL; 616 st->rt_kif = NULL; 617 618 st->pfsync_time = time_uptime; 619 st->sync_state = PFSYNC_S_NONE; 620 621 if (!(flags & PFSYNC_SI_IOCTL)) 622 st->state_flags |= PFSTATE_NOSYNC; 623 624 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 625 goto cleanup_state; 626 627 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 628 counter_u64_add(r->states_cur, 1); 629 counter_u64_add(r->states_tot, 1); 630 631 if (!(flags & PFSYNC_SI_IOCTL)) { 632 st->state_flags &= ~PFSTATE_NOSYNC; 633 if (st->state_flags & PFSTATE_ACK) { 634 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 635 PFSYNC_BUCKET_LOCK(b); 636 pfsync_q_ins(st, PFSYNC_S_IACK, true); 637 PFSYNC_BUCKET_UNLOCK(b); 638 639 pfsync_push_all(sc); 640 } 641 } 642 st->state_flags &= ~PFSTATE_ACK; 643 PF_STATE_UNLOCK(st); 644 645 return (0); 646 647 cleanup: 648 error = ENOMEM; 649 if (skw == sks) 650 sks = NULL; 651 uma_zfree(V_pf_state_key_z, skw); 652 uma_zfree(V_pf_state_key_z, sks); 653 654 cleanup_state: /* pf_state_insert() frees the state keys. */ 655 if (st) { 656 st->timeout = PFTM_UNLINKED; /* appease an assert */ 657 pf_free_state(st); 658 } 659 return (error); 660 } 661 662 #ifdef INET 663 static int 664 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 665 { 666 struct pfsync_softc *sc = V_pfsyncif; 667 struct mbuf *m = *mp; 668 struct ip *ip = mtod(m, struct ip *); 669 struct pfsync_header *ph; 670 struct pfsync_subheader subh; 671 672 int offset, len, flags = 0; 673 int rv; 674 uint16_t count; 675 676 PF_RULES_RLOCK_TRACKER; 677 678 *mp = NULL; 679 V_pfsyncstats.pfsyncs_ipackets++; 680 681 /* Verify that we have a sync interface configured. */ 682 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 683 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 684 goto done; 685 686 /* verify that the packet came in on the right interface */ 687 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 688 V_pfsyncstats.pfsyncs_badif++; 689 goto done; 690 } 691 692 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 693 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 694 /* verify that the IP TTL is 255. */ 695 if (ip->ip_ttl != PFSYNC_DFLTTL) { 696 V_pfsyncstats.pfsyncs_badttl++; 697 goto done; 698 } 699 700 offset = ip->ip_hl << 2; 701 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 702 V_pfsyncstats.pfsyncs_hdrops++; 703 goto done; 704 } 705 706 if (offset + sizeof(*ph) > m->m_len) { 707 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 708 V_pfsyncstats.pfsyncs_hdrops++; 709 return (IPPROTO_DONE); 710 } 711 ip = mtod(m, struct ip *); 712 } 713 ph = (struct pfsync_header *)((char *)ip + offset); 714 715 /* verify the version */ 716 if (ph->version != PFSYNC_VERSION) { 717 V_pfsyncstats.pfsyncs_badver++; 718 goto done; 719 } 720 721 len = ntohs(ph->len) + offset; 722 if (m->m_pkthdr.len < len) { 723 V_pfsyncstats.pfsyncs_badlen++; 724 goto done; 725 } 726 727 /* 728 * Trusting pf_chksum during packet processing, as well as seeking 729 * in interface name tree, require holding PF_RULES_RLOCK(). 730 */ 731 PF_RULES_RLOCK(); 732 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 733 flags = PFSYNC_SI_CKSUM; 734 735 offset += sizeof(*ph); 736 while (offset <= len - sizeof(subh)) { 737 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 738 offset += sizeof(subh); 739 740 if (subh.action >= PFSYNC_ACT_MAX) { 741 V_pfsyncstats.pfsyncs_badact++; 742 PF_RULES_RUNLOCK(); 743 goto done; 744 } 745 746 count = ntohs(subh.count); 747 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 748 rv = (*pfsync_acts[subh.action])(m, offset, count, flags); 749 if (rv == -1) { 750 PF_RULES_RUNLOCK(); 751 return (IPPROTO_DONE); 752 } 753 754 offset += rv; 755 } 756 PF_RULES_RUNLOCK(); 757 758 done: 759 m_freem(m); 760 return (IPPROTO_DONE); 761 } 762 #endif 763 764 static int 765 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags) 766 { 767 struct pfsync_clr *clr; 768 struct mbuf *mp; 769 int len = sizeof(*clr) * count; 770 int i, offp; 771 u_int32_t creatorid; 772 773 mp = m_pulldown(m, offset, len, &offp); 774 if (mp == NULL) { 775 V_pfsyncstats.pfsyncs_badlen++; 776 return (-1); 777 } 778 clr = (struct pfsync_clr *)(mp->m_data + offp); 779 780 for (i = 0; i < count; i++) { 781 creatorid = clr[i].creatorid; 782 783 if (clr[i].ifname[0] != '\0' && 784 pfi_kkif_find(clr[i].ifname) == NULL) 785 continue; 786 787 for (int i = 0; i <= pf_hashmask; i++) { 788 struct pf_idhash *ih = &V_pf_idhash[i]; 789 struct pf_kstate *s; 790 relock: 791 PF_HASHROW_LOCK(ih); 792 LIST_FOREACH(s, &ih->states, entry) { 793 if (s->creatorid == creatorid) { 794 s->state_flags |= PFSTATE_NOSYNC; 795 pf_unlink_state(s); 796 goto relock; 797 } 798 } 799 PF_HASHROW_UNLOCK(ih); 800 } 801 } 802 803 return (len); 804 } 805 806 static int 807 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags) 808 { 809 struct mbuf *mp; 810 struct pfsync_state *sa, *sp; 811 int len = sizeof(*sp) * count; 812 int i, offp; 813 814 mp = m_pulldown(m, offset, len, &offp); 815 if (mp == NULL) { 816 V_pfsyncstats.pfsyncs_badlen++; 817 return (-1); 818 } 819 sa = (struct pfsync_state *)(mp->m_data + offp); 820 821 for (i = 0; i < count; i++) { 822 sp = &sa[i]; 823 824 /* Check for invalid values. */ 825 if (sp->timeout >= PFTM_MAX || 826 sp->src.state > PF_TCPS_PROXY_DST || 827 sp->dst.state > PF_TCPS_PROXY_DST || 828 sp->direction > PF_OUT || 829 (sp->af != AF_INET && sp->af != AF_INET6)) { 830 if (V_pf_status.debug >= PF_DEBUG_MISC) 831 printf("%s: invalid value\n", __func__); 832 V_pfsyncstats.pfsyncs_badval++; 833 continue; 834 } 835 836 if (pfsync_state_import(sp, flags) == ENOMEM) 837 /* Drop out, but process the rest of the actions. */ 838 break; 839 } 840 841 return (len); 842 } 843 844 static int 845 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags) 846 { 847 struct pfsync_ins_ack *ia, *iaa; 848 struct pf_kstate *st; 849 850 struct mbuf *mp; 851 int len = count * sizeof(*ia); 852 int offp, i; 853 854 mp = m_pulldown(m, offset, len, &offp); 855 if (mp == NULL) { 856 V_pfsyncstats.pfsyncs_badlen++; 857 return (-1); 858 } 859 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 860 861 for (i = 0; i < count; i++) { 862 ia = &iaa[i]; 863 864 st = pf_find_state_byid(ia->id, ia->creatorid); 865 if (st == NULL) 866 continue; 867 868 if (st->state_flags & PFSTATE_ACK) { 869 pfsync_undefer_state(st, 0); 870 } 871 PF_STATE_UNLOCK(st); 872 } 873 /* 874 * XXX this is not yet implemented, but we know the size of the 875 * message so we can skip it. 876 */ 877 878 return (count * sizeof(struct pfsync_ins_ack)); 879 } 880 881 static int 882 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, 883 struct pfsync_state_peer *dst) 884 { 885 int sync = 0; 886 887 PF_STATE_LOCK_ASSERT(st); 888 889 /* 890 * The state should never go backwards except 891 * for syn-proxy states. Neither should the 892 * sequence window slide backwards. 893 */ 894 if ((st->src.state > src->state && 895 (st->src.state < PF_TCPS_PROXY_SRC || 896 src->state >= PF_TCPS_PROXY_SRC)) || 897 898 (st->src.state == src->state && 899 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 900 sync++; 901 else 902 pf_state_peer_ntoh(src, &st->src); 903 904 if ((st->dst.state > dst->state) || 905 906 (st->dst.state >= TCPS_SYN_SENT && 907 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 908 sync++; 909 else 910 pf_state_peer_ntoh(dst, &st->dst); 911 912 return (sync); 913 } 914 915 static int 916 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags) 917 { 918 struct pfsync_softc *sc = V_pfsyncif; 919 struct pfsync_state *sa, *sp; 920 struct pf_kstate *st; 921 int sync; 922 923 struct mbuf *mp; 924 int len = count * sizeof(*sp); 925 int offp, i; 926 927 mp = m_pulldown(m, offset, len, &offp); 928 if (mp == NULL) { 929 V_pfsyncstats.pfsyncs_badlen++; 930 return (-1); 931 } 932 sa = (struct pfsync_state *)(mp->m_data + offp); 933 934 for (i = 0; i < count; i++) { 935 sp = &sa[i]; 936 937 /* check for invalid values */ 938 if (sp->timeout >= PFTM_MAX || 939 sp->src.state > PF_TCPS_PROXY_DST || 940 sp->dst.state > PF_TCPS_PROXY_DST) { 941 if (V_pf_status.debug >= PF_DEBUG_MISC) { 942 printf("pfsync_input: PFSYNC_ACT_UPD: " 943 "invalid value\n"); 944 } 945 V_pfsyncstats.pfsyncs_badval++; 946 continue; 947 } 948 949 st = pf_find_state_byid(sp->id, sp->creatorid); 950 if (st == NULL) { 951 /* insert the update */ 952 if (pfsync_state_import(sp, flags)) 953 V_pfsyncstats.pfsyncs_badstate++; 954 continue; 955 } 956 957 if (st->state_flags & PFSTATE_ACK) { 958 pfsync_undefer_state(st, 1); 959 } 960 961 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 962 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 963 else { 964 sync = 0; 965 966 /* 967 * Non-TCP protocol state machine always go 968 * forwards 969 */ 970 if (st->src.state > sp->src.state) 971 sync++; 972 else 973 pf_state_peer_ntoh(&sp->src, &st->src); 974 if (st->dst.state > sp->dst.state) 975 sync++; 976 else 977 pf_state_peer_ntoh(&sp->dst, &st->dst); 978 } 979 if (sync < 2) { 980 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 981 pf_state_peer_ntoh(&sp->dst, &st->dst); 982 st->expire = time_uptime; 983 st->timeout = sp->timeout; 984 } 985 st->pfsync_time = time_uptime; 986 987 if (sync) { 988 V_pfsyncstats.pfsyncs_stale++; 989 990 pfsync_update_state(st); 991 PF_STATE_UNLOCK(st); 992 pfsync_push_all(sc); 993 continue; 994 } 995 PF_STATE_UNLOCK(st); 996 } 997 998 return (len); 999 } 1000 1001 static int 1002 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags) 1003 { 1004 struct pfsync_softc *sc = V_pfsyncif; 1005 struct pfsync_upd_c *ua, *up; 1006 struct pf_kstate *st; 1007 int len = count * sizeof(*up); 1008 int sync; 1009 struct mbuf *mp; 1010 int offp, i; 1011 1012 mp = m_pulldown(m, offset, len, &offp); 1013 if (mp == NULL) { 1014 V_pfsyncstats.pfsyncs_badlen++; 1015 return (-1); 1016 } 1017 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1018 1019 for (i = 0; i < count; i++) { 1020 up = &ua[i]; 1021 1022 /* check for invalid values */ 1023 if (up->timeout >= PFTM_MAX || 1024 up->src.state > PF_TCPS_PROXY_DST || 1025 up->dst.state > PF_TCPS_PROXY_DST) { 1026 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1027 printf("pfsync_input: " 1028 "PFSYNC_ACT_UPD_C: " 1029 "invalid value\n"); 1030 } 1031 V_pfsyncstats.pfsyncs_badval++; 1032 continue; 1033 } 1034 1035 st = pf_find_state_byid(up->id, up->creatorid); 1036 if (st == NULL) { 1037 /* We don't have this state. Ask for it. */ 1038 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1039 pfsync_request_update(up->creatorid, up->id); 1040 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1041 continue; 1042 } 1043 1044 if (st->state_flags & PFSTATE_ACK) { 1045 pfsync_undefer_state(st, 1); 1046 } 1047 1048 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1049 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1050 else { 1051 sync = 0; 1052 1053 /* 1054 * Non-TCP protocol state machine always go 1055 * forwards 1056 */ 1057 if (st->src.state > up->src.state) 1058 sync++; 1059 else 1060 pf_state_peer_ntoh(&up->src, &st->src); 1061 if (st->dst.state > up->dst.state) 1062 sync++; 1063 else 1064 pf_state_peer_ntoh(&up->dst, &st->dst); 1065 } 1066 if (sync < 2) { 1067 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1068 pf_state_peer_ntoh(&up->dst, &st->dst); 1069 st->expire = time_uptime; 1070 st->timeout = up->timeout; 1071 } 1072 st->pfsync_time = time_uptime; 1073 1074 if (sync) { 1075 V_pfsyncstats.pfsyncs_stale++; 1076 1077 pfsync_update_state(st); 1078 PF_STATE_UNLOCK(st); 1079 pfsync_push_all(sc); 1080 continue; 1081 } 1082 PF_STATE_UNLOCK(st); 1083 } 1084 1085 return (len); 1086 } 1087 1088 static int 1089 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags) 1090 { 1091 struct pfsync_upd_req *ur, *ura; 1092 struct mbuf *mp; 1093 int len = count * sizeof(*ur); 1094 int i, offp; 1095 1096 struct pf_kstate *st; 1097 1098 mp = m_pulldown(m, offset, len, &offp); 1099 if (mp == NULL) { 1100 V_pfsyncstats.pfsyncs_badlen++; 1101 return (-1); 1102 } 1103 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1104 1105 for (i = 0; i < count; i++) { 1106 ur = &ura[i]; 1107 1108 if (ur->id == 0 && ur->creatorid == 0) 1109 pfsync_bulk_start(); 1110 else { 1111 st = pf_find_state_byid(ur->id, ur->creatorid); 1112 if (st == NULL) { 1113 V_pfsyncstats.pfsyncs_badstate++; 1114 continue; 1115 } 1116 if (st->state_flags & PFSTATE_NOSYNC) { 1117 PF_STATE_UNLOCK(st); 1118 continue; 1119 } 1120 1121 pfsync_update_state_req(st); 1122 PF_STATE_UNLOCK(st); 1123 } 1124 } 1125 1126 return (len); 1127 } 1128 1129 static int 1130 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags) 1131 { 1132 struct mbuf *mp; 1133 struct pfsync_del_c *sa, *sp; 1134 struct pf_kstate *st; 1135 int len = count * sizeof(*sp); 1136 int offp, i; 1137 1138 mp = m_pulldown(m, offset, len, &offp); 1139 if (mp == NULL) { 1140 V_pfsyncstats.pfsyncs_badlen++; 1141 return (-1); 1142 } 1143 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1144 1145 for (i = 0; i < count; i++) { 1146 sp = &sa[i]; 1147 1148 st = pf_find_state_byid(sp->id, sp->creatorid); 1149 if (st == NULL) { 1150 V_pfsyncstats.pfsyncs_badstate++; 1151 continue; 1152 } 1153 1154 st->state_flags |= PFSTATE_NOSYNC; 1155 pf_unlink_state(st); 1156 } 1157 1158 return (len); 1159 } 1160 1161 static int 1162 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags) 1163 { 1164 struct pfsync_softc *sc = V_pfsyncif; 1165 struct pfsync_bus *bus; 1166 struct mbuf *mp; 1167 int len = count * sizeof(*bus); 1168 int offp; 1169 1170 PFSYNC_BLOCK(sc); 1171 1172 /* If we're not waiting for a bulk update, who cares. */ 1173 if (sc->sc_ureq_sent == 0) { 1174 PFSYNC_BUNLOCK(sc); 1175 return (len); 1176 } 1177 1178 mp = m_pulldown(m, offset, len, &offp); 1179 if (mp == NULL) { 1180 PFSYNC_BUNLOCK(sc); 1181 V_pfsyncstats.pfsyncs_badlen++; 1182 return (-1); 1183 } 1184 bus = (struct pfsync_bus *)(mp->m_data + offp); 1185 1186 switch (bus->status) { 1187 case PFSYNC_BUS_START: 1188 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1189 V_pf_limits[PF_LIMIT_STATES].limit / 1190 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1191 sizeof(struct pfsync_state)), 1192 pfsync_bulk_fail, sc); 1193 if (V_pf_status.debug >= PF_DEBUG_MISC) 1194 printf("pfsync: received bulk update start\n"); 1195 break; 1196 1197 case PFSYNC_BUS_END: 1198 if (time_uptime - ntohl(bus->endtime) >= 1199 sc->sc_ureq_sent) { 1200 /* that's it, we're happy */ 1201 sc->sc_ureq_sent = 0; 1202 sc->sc_bulk_tries = 0; 1203 callout_stop(&sc->sc_bulkfail_tmo); 1204 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1205 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1206 "pfsync bulk done"); 1207 sc->sc_flags |= PFSYNCF_OK; 1208 if (V_pf_status.debug >= PF_DEBUG_MISC) 1209 printf("pfsync: received valid " 1210 "bulk update end\n"); 1211 } else { 1212 if (V_pf_status.debug >= PF_DEBUG_MISC) 1213 printf("pfsync: received invalid " 1214 "bulk update end: bad timestamp\n"); 1215 } 1216 break; 1217 } 1218 PFSYNC_BUNLOCK(sc); 1219 1220 return (len); 1221 } 1222 1223 static int 1224 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags) 1225 { 1226 int len = count * sizeof(struct pfsync_tdb); 1227 1228 #if defined(IPSEC) 1229 struct pfsync_tdb *tp; 1230 struct mbuf *mp; 1231 int offp; 1232 int i; 1233 int s; 1234 1235 mp = m_pulldown(m, offset, len, &offp); 1236 if (mp == NULL) { 1237 V_pfsyncstats.pfsyncs_badlen++; 1238 return (-1); 1239 } 1240 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1241 1242 for (i = 0; i < count; i++) 1243 pfsync_update_net_tdb(&tp[i]); 1244 #endif 1245 1246 return (len); 1247 } 1248 1249 #if defined(IPSEC) 1250 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1251 static void 1252 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1253 { 1254 struct tdb *tdb; 1255 int s; 1256 1257 /* check for invalid values */ 1258 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1259 (pt->dst.sa.sa_family != AF_INET && 1260 pt->dst.sa.sa_family != AF_INET6)) 1261 goto bad; 1262 1263 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1264 if (tdb) { 1265 pt->rpl = ntohl(pt->rpl); 1266 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1267 1268 /* Neither replay nor byte counter should ever decrease. */ 1269 if (pt->rpl < tdb->tdb_rpl || 1270 pt->cur_bytes < tdb->tdb_cur_bytes) { 1271 goto bad; 1272 } 1273 1274 tdb->tdb_rpl = pt->rpl; 1275 tdb->tdb_cur_bytes = pt->cur_bytes; 1276 } 1277 return; 1278 1279 bad: 1280 if (V_pf_status.debug >= PF_DEBUG_MISC) 1281 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1282 "invalid value\n"); 1283 V_pfsyncstats.pfsyncs_badstate++; 1284 return; 1285 } 1286 #endif 1287 1288 static int 1289 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags) 1290 { 1291 /* check if we are at the right place in the packet */ 1292 if (offset != m->m_pkthdr.len) 1293 V_pfsyncstats.pfsyncs_badlen++; 1294 1295 /* we're done. free and let the caller return */ 1296 m_freem(m); 1297 return (-1); 1298 } 1299 1300 static int 1301 pfsync_in_error(struct mbuf *m, int offset, int count, int flags) 1302 { 1303 V_pfsyncstats.pfsyncs_badact++; 1304 1305 m_freem(m); 1306 return (-1); 1307 } 1308 1309 static int 1310 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1311 struct route *rt) 1312 { 1313 m_freem(m); 1314 return (0); 1315 } 1316 1317 /* ARGSUSED */ 1318 static int 1319 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1320 { 1321 struct pfsync_softc *sc = ifp->if_softc; 1322 struct ifreq *ifr = (struct ifreq *)data; 1323 struct pfsyncreq pfsyncr; 1324 size_t nvbuflen; 1325 int error; 1326 int c; 1327 1328 switch (cmd) { 1329 case SIOCSIFFLAGS: 1330 PFSYNC_LOCK(sc); 1331 if (ifp->if_flags & IFF_UP) { 1332 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1333 PFSYNC_UNLOCK(sc); 1334 pfsync_pointers_init(); 1335 } else { 1336 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1337 PFSYNC_UNLOCK(sc); 1338 pfsync_pointers_uninit(); 1339 } 1340 break; 1341 case SIOCSIFMTU: 1342 if (!sc->sc_sync_if || 1343 ifr->ifr_mtu <= PFSYNC_MINPKT || 1344 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1345 return (EINVAL); 1346 if (ifr->ifr_mtu < ifp->if_mtu) { 1347 for (c = 0; c < pfsync_buckets; c++) { 1348 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1349 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1350 pfsync_sendout(1, c); 1351 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1352 } 1353 } 1354 ifp->if_mtu = ifr->ifr_mtu; 1355 break; 1356 case SIOCGETPFSYNC: 1357 bzero(&pfsyncr, sizeof(pfsyncr)); 1358 PFSYNC_LOCK(sc); 1359 if (sc->sc_sync_if) { 1360 strlcpy(pfsyncr.pfsyncr_syncdev, 1361 sc->sc_sync_if->if_xname, IFNAMSIZ); 1362 } 1363 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1364 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1365 pfsyncr.pfsyncr_defer = sc->sc_flags; 1366 PFSYNC_UNLOCK(sc); 1367 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1368 sizeof(pfsyncr))); 1369 1370 case SIOCGETPFSYNCNV: 1371 { 1372 nvlist_t *nvl_syncpeer; 1373 nvlist_t *nvl = nvlist_create(0); 1374 1375 if (nvl == NULL) 1376 return (ENOMEM); 1377 1378 if (sc->sc_sync_if) 1379 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1380 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1381 nvlist_add_number(nvl, "flags", sc->sc_flags); 1382 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1383 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1384 1385 void *packed = NULL; 1386 packed = nvlist_pack(nvl, &nvbuflen); 1387 if (packed == NULL) { 1388 free(packed, M_NVLIST); 1389 nvlist_destroy(nvl); 1390 return (ENOMEM); 1391 } 1392 1393 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1394 ifr->ifr_cap_nv.length = nvbuflen; 1395 ifr->ifr_cap_nv.buffer = NULL; 1396 free(packed, M_NVLIST); 1397 nvlist_destroy(nvl); 1398 return (EFBIG); 1399 } 1400 1401 ifr->ifr_cap_nv.length = nvbuflen; 1402 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1403 1404 nvlist_destroy(nvl); 1405 nvlist_destroy(nvl_syncpeer); 1406 free(packed, M_NVLIST); 1407 break; 1408 } 1409 1410 case SIOCSETPFSYNC: 1411 { 1412 struct pfsync_kstatus status; 1413 1414 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1415 return (error); 1416 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1417 sizeof(pfsyncr)))) 1418 return (error); 1419 1420 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1421 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1422 1423 error = pfsync_kstatus_to_softc(&status, sc); 1424 return (error); 1425 } 1426 case SIOCSETPFSYNCNV: 1427 { 1428 struct pfsync_kstatus status; 1429 void *data; 1430 nvlist_t *nvl; 1431 1432 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1433 return (error); 1434 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1435 return (EINVAL); 1436 1437 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1438 1439 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1440 ifr->ifr_cap_nv.length)) != 0) { 1441 free(data, M_TEMP); 1442 return (error); 1443 } 1444 1445 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1446 free(data, M_TEMP); 1447 return (EINVAL); 1448 } 1449 1450 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1451 pfsync_nvstatus_to_kstatus(nvl, &status); 1452 1453 nvlist_destroy(nvl); 1454 free(data, M_TEMP); 1455 1456 error = pfsync_kstatus_to_softc(&status, sc); 1457 return (error); 1458 } 1459 default: 1460 return (ENOTTY); 1461 } 1462 1463 return (0); 1464 } 1465 1466 static void 1467 pfsync_out_state(struct pf_kstate *st, void *buf) 1468 { 1469 struct pfsync_state *sp = buf; 1470 1471 pfsync_state_export(sp, st); 1472 } 1473 1474 static void 1475 pfsync_out_iack(struct pf_kstate *st, void *buf) 1476 { 1477 struct pfsync_ins_ack *iack = buf; 1478 1479 iack->id = st->id; 1480 iack->creatorid = st->creatorid; 1481 } 1482 1483 static void 1484 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1485 { 1486 struct pfsync_upd_c *up = buf; 1487 1488 bzero(up, sizeof(*up)); 1489 up->id = st->id; 1490 pf_state_peer_hton(&st->src, &up->src); 1491 pf_state_peer_hton(&st->dst, &up->dst); 1492 up->creatorid = st->creatorid; 1493 up->timeout = st->timeout; 1494 } 1495 1496 static void 1497 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1498 { 1499 struct pfsync_del_c *dp = buf; 1500 1501 dp->id = st->id; 1502 dp->creatorid = st->creatorid; 1503 st->state_flags |= PFSTATE_NOSYNC; 1504 } 1505 1506 static void 1507 pfsync_drop(struct pfsync_softc *sc) 1508 { 1509 struct pf_kstate *st, *next; 1510 struct pfsync_upd_req_item *ur; 1511 struct pfsync_bucket *b; 1512 int c, q; 1513 1514 for (c = 0; c < pfsync_buckets; c++) { 1515 b = &sc->sc_buckets[c]; 1516 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1517 if (TAILQ_EMPTY(&b->b_qs[q])) 1518 continue; 1519 1520 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1521 KASSERT(st->sync_state == q, 1522 ("%s: st->sync_state == q", 1523 __func__)); 1524 st->sync_state = PFSYNC_S_NONE; 1525 pf_release_state(st); 1526 } 1527 TAILQ_INIT(&b->b_qs[q]); 1528 } 1529 1530 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1531 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1532 free(ur, M_PFSYNC); 1533 } 1534 1535 b->b_len = PFSYNC_MINPKT; 1536 b->b_plus = NULL; 1537 } 1538 } 1539 1540 static void 1541 pfsync_sendout(int schedswi, int c) 1542 { 1543 struct pfsync_softc *sc = V_pfsyncif; 1544 struct ifnet *ifp = sc->sc_ifp; 1545 struct mbuf *m; 1546 struct pfsync_header *ph; 1547 struct pfsync_subheader *subh; 1548 struct pf_kstate *st, *st_next; 1549 struct pfsync_upd_req_item *ur; 1550 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1551 int aflen, offset; 1552 int q, count = 0; 1553 1554 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1555 KASSERT(b->b_len > PFSYNC_MINPKT, 1556 ("%s: sc_len %zu", __func__, b->b_len)); 1557 PFSYNC_BUCKET_LOCK_ASSERT(b); 1558 1559 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1560 pfsync_drop(sc); 1561 return; 1562 } 1563 1564 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1565 if (m == NULL) { 1566 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1567 V_pfsyncstats.pfsyncs_onomem++; 1568 return; 1569 } 1570 m->m_data += max_linkhdr; 1571 m->m_len = m->m_pkthdr.len = b->b_len; 1572 1573 /* build the ip header */ 1574 switch (sc->sc_sync_peer.ss_family) { 1575 #ifdef INET 1576 case AF_INET: 1577 { 1578 struct ip *ip; 1579 1580 ip = mtod(m, struct ip *); 1581 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1582 aflen = offset = sizeof(*ip); 1583 1584 ip->ip_len = htons(m->m_pkthdr.len); 1585 ip_fillid(ip); 1586 break; 1587 } 1588 #endif 1589 default: 1590 m_freem(m); 1591 return; 1592 } 1593 1594 1595 /* build the pfsync header */ 1596 ph = (struct pfsync_header *)(m->m_data + offset); 1597 bzero(ph, sizeof(*ph)); 1598 offset += sizeof(*ph); 1599 1600 ph->version = PFSYNC_VERSION; 1601 ph->len = htons(b->b_len - aflen); 1602 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1603 1604 /* walk the queues */ 1605 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1606 if (TAILQ_EMPTY(&b->b_qs[q])) 1607 continue; 1608 1609 subh = (struct pfsync_subheader *)(m->m_data + offset); 1610 offset += sizeof(*subh); 1611 1612 count = 0; 1613 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1614 KASSERT(st->sync_state == q, 1615 ("%s: st->sync_state == q", 1616 __func__)); 1617 /* 1618 * XXXGL: some of write methods do unlocked reads 1619 * of state data :( 1620 */ 1621 pfsync_qs[q].write(st, m->m_data + offset); 1622 offset += pfsync_qs[q].len; 1623 st->sync_state = PFSYNC_S_NONE; 1624 pf_release_state(st); 1625 count++; 1626 } 1627 TAILQ_INIT(&b->b_qs[q]); 1628 1629 bzero(subh, sizeof(*subh)); 1630 subh->action = pfsync_qs[q].action; 1631 subh->count = htons(count); 1632 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1633 } 1634 1635 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1636 subh = (struct pfsync_subheader *)(m->m_data + offset); 1637 offset += sizeof(*subh); 1638 1639 count = 0; 1640 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1641 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1642 1643 bcopy(&ur->ur_msg, m->m_data + offset, 1644 sizeof(ur->ur_msg)); 1645 offset += sizeof(ur->ur_msg); 1646 free(ur, M_PFSYNC); 1647 count++; 1648 } 1649 1650 bzero(subh, sizeof(*subh)); 1651 subh->action = PFSYNC_ACT_UPD_REQ; 1652 subh->count = htons(count); 1653 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1654 } 1655 1656 /* has someone built a custom region for us to add? */ 1657 if (b->b_plus != NULL) { 1658 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 1659 offset += b->b_pluslen; 1660 1661 b->b_plus = NULL; 1662 } 1663 1664 subh = (struct pfsync_subheader *)(m->m_data + offset); 1665 offset += sizeof(*subh); 1666 1667 bzero(subh, sizeof(*subh)); 1668 subh->action = PFSYNC_ACT_EOF; 1669 subh->count = htons(1); 1670 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1671 1672 /* we're done, let's put it on the wire */ 1673 if (ifp->if_bpf) { 1674 m->m_data += aflen; 1675 m->m_len = m->m_pkthdr.len = b->b_len - aflen; 1676 BPF_MTAP(ifp, m); 1677 m->m_data -= aflen; 1678 m->m_len = m->m_pkthdr.len = b->b_len; 1679 } 1680 1681 if (sc->sc_sync_if == NULL) { 1682 b->b_len = PFSYNC_MINPKT; 1683 m_freem(m); 1684 return; 1685 } 1686 1687 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1688 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1689 b->b_len = PFSYNC_MINPKT; 1690 1691 if (!_IF_QFULL(&b->b_snd)) 1692 _IF_ENQUEUE(&b->b_snd, m); 1693 else { 1694 m_freem(m); 1695 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1696 } 1697 if (schedswi) 1698 swi_sched(V_pfsync_swi_cookie, 0); 1699 } 1700 1701 static void 1702 pfsync_insert_state(struct pf_kstate *st) 1703 { 1704 struct pfsync_softc *sc = V_pfsyncif; 1705 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1706 1707 if (st->state_flags & PFSTATE_NOSYNC) 1708 return; 1709 1710 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1711 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1712 st->state_flags |= PFSTATE_NOSYNC; 1713 return; 1714 } 1715 1716 KASSERT(st->sync_state == PFSYNC_S_NONE, 1717 ("%s: st->sync_state %u", __func__, st->sync_state)); 1718 1719 PFSYNC_BUCKET_LOCK(b); 1720 if (b->b_len == PFSYNC_MINPKT) 1721 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1722 1723 pfsync_q_ins(st, PFSYNC_S_INS, true); 1724 PFSYNC_BUCKET_UNLOCK(b); 1725 1726 st->sync_updates = 0; 1727 } 1728 1729 static int 1730 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 1731 { 1732 struct pfsync_softc *sc = V_pfsyncif; 1733 struct pfsync_deferral *pd; 1734 struct pfsync_bucket *b; 1735 1736 if (m->m_flags & (M_BCAST|M_MCAST)) 1737 return (0); 1738 1739 if (sc == NULL) 1740 return (0); 1741 1742 b = pfsync_get_bucket(sc, st); 1743 1744 PFSYNC_LOCK(sc); 1745 1746 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 1747 !(sc->sc_flags & PFSYNCF_DEFER)) { 1748 PFSYNC_UNLOCK(sc); 1749 return (0); 1750 } 1751 1752 PFSYNC_BUCKET_LOCK(b); 1753 PFSYNC_UNLOCK(sc); 1754 1755 if (b->b_deferred >= 128) 1756 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 1757 1758 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1759 if (pd == NULL) { 1760 PFSYNC_BUCKET_UNLOCK(b); 1761 return (0); 1762 } 1763 b->b_deferred++; 1764 1765 m->m_flags |= M_SKIP_FIREWALL; 1766 st->state_flags |= PFSTATE_ACK; 1767 1768 pd->pd_sc = sc; 1769 pd->pd_st = st; 1770 pf_ref_state(st); 1771 pd->pd_m = m; 1772 1773 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 1774 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 1775 callout_reset(&pd->pd_tmo, PFSYNC_DEFER_TIMEOUT, pfsync_defer_tmo, pd); 1776 1777 pfsync_push(b); 1778 PFSYNC_BUCKET_UNLOCK(b); 1779 1780 return (1); 1781 } 1782 1783 static void 1784 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1785 { 1786 struct pfsync_softc *sc = pd->pd_sc; 1787 struct mbuf *m = pd->pd_m; 1788 struct pf_kstate *st = pd->pd_st; 1789 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1790 1791 PFSYNC_BUCKET_LOCK_ASSERT(b); 1792 1793 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1794 b->b_deferred--; 1795 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1796 free(pd, M_PFSYNC); 1797 pf_release_state(st); 1798 1799 if (drop) 1800 m_freem(m); 1801 else { 1802 _IF_ENQUEUE(&b->b_snd, m); 1803 pfsync_push(b); 1804 } 1805 } 1806 1807 static void 1808 pfsync_defer_tmo(void *arg) 1809 { 1810 struct epoch_tracker et; 1811 struct pfsync_deferral *pd = arg; 1812 struct pfsync_softc *sc = pd->pd_sc; 1813 struct mbuf *m = pd->pd_m; 1814 struct pf_kstate *st = pd->pd_st; 1815 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1816 1817 PFSYNC_BUCKET_LOCK_ASSERT(b); 1818 1819 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1820 b->b_deferred--; 1821 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1822 PFSYNC_BUCKET_UNLOCK(b); 1823 free(pd, M_PFSYNC); 1824 1825 if (sc->sc_sync_if == NULL) { 1826 pf_release_state(st); 1827 m_freem(m); 1828 return; 1829 } 1830 1831 NET_EPOCH_ENTER(et); 1832 CURVNET_SET(sc->sc_sync_if->if_vnet); 1833 1834 pfsync_tx(sc, m); 1835 1836 pf_release_state(st); 1837 1838 CURVNET_RESTORE(); 1839 NET_EPOCH_EXIT(et); 1840 } 1841 1842 static void 1843 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 1844 { 1845 struct pfsync_softc *sc = V_pfsyncif; 1846 struct pfsync_deferral *pd; 1847 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1848 1849 PFSYNC_BUCKET_LOCK_ASSERT(b); 1850 1851 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 1852 if (pd->pd_st == st) { 1853 if (callout_stop(&pd->pd_tmo) > 0) 1854 pfsync_undefer(pd, drop); 1855 1856 return; 1857 } 1858 } 1859 1860 panic("%s: unable to find deferred state", __func__); 1861 } 1862 1863 static void 1864 pfsync_undefer_state(struct pf_kstate *st, int drop) 1865 { 1866 struct pfsync_softc *sc = V_pfsyncif; 1867 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1868 1869 PFSYNC_BUCKET_LOCK(b); 1870 pfsync_undefer_state_locked(st, drop); 1871 PFSYNC_BUCKET_UNLOCK(b); 1872 } 1873 1874 static struct pfsync_bucket* 1875 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 1876 { 1877 int c = PF_IDHASH(st) % pfsync_buckets; 1878 return &sc->sc_buckets[c]; 1879 } 1880 1881 static void 1882 pfsync_update_state(struct pf_kstate *st) 1883 { 1884 struct pfsync_softc *sc = V_pfsyncif; 1885 bool sync = false, ref = true; 1886 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1887 1888 PF_STATE_LOCK_ASSERT(st); 1889 PFSYNC_BUCKET_LOCK(b); 1890 1891 if (st->state_flags & PFSTATE_ACK) 1892 pfsync_undefer_state_locked(st, 0); 1893 if (st->state_flags & PFSTATE_NOSYNC) { 1894 if (st->sync_state != PFSYNC_S_NONE) 1895 pfsync_q_del(st, true, b); 1896 PFSYNC_BUCKET_UNLOCK(b); 1897 return; 1898 } 1899 1900 if (b->b_len == PFSYNC_MINPKT) 1901 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1902 1903 switch (st->sync_state) { 1904 case PFSYNC_S_UPD_C: 1905 case PFSYNC_S_UPD: 1906 case PFSYNC_S_INS: 1907 /* we're already handling it */ 1908 1909 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1910 st->sync_updates++; 1911 if (st->sync_updates >= sc->sc_maxupdates) 1912 sync = true; 1913 } 1914 break; 1915 1916 case PFSYNC_S_IACK: 1917 pfsync_q_del(st, false, b); 1918 ref = false; 1919 /* FALLTHROUGH */ 1920 1921 case PFSYNC_S_NONE: 1922 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1923 st->sync_updates = 0; 1924 break; 1925 1926 default: 1927 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1928 } 1929 1930 if (sync || (time_uptime - st->pfsync_time) < 2) 1931 pfsync_push(b); 1932 1933 PFSYNC_BUCKET_UNLOCK(b); 1934 } 1935 1936 static void 1937 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1938 { 1939 struct pfsync_softc *sc = V_pfsyncif; 1940 struct pfsync_bucket *b = &sc->sc_buckets[0]; 1941 struct pfsync_upd_req_item *item; 1942 size_t nlen = sizeof(struct pfsync_upd_req); 1943 1944 PFSYNC_BUCKET_LOCK_ASSERT(b); 1945 1946 /* 1947 * This code does a bit to prevent multiple update requests for the 1948 * same state being generated. It searches current subheader queue, 1949 * but it doesn't lookup into queue of already packed datagrams. 1950 */ 1951 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 1952 if (item->ur_msg.id == id && 1953 item->ur_msg.creatorid == creatorid) 1954 return; 1955 1956 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1957 if (item == NULL) 1958 return; /* XXX stats */ 1959 1960 item->ur_msg.id = id; 1961 item->ur_msg.creatorid = creatorid; 1962 1963 if (TAILQ_EMPTY(&b->b_upd_req_list)) 1964 nlen += sizeof(struct pfsync_subheader); 1965 1966 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 1967 pfsync_sendout(0, 0); 1968 1969 nlen = sizeof(struct pfsync_subheader) + 1970 sizeof(struct pfsync_upd_req); 1971 } 1972 1973 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 1974 b->b_len += nlen; 1975 1976 pfsync_push(b); 1977 } 1978 1979 static bool 1980 pfsync_update_state_req(struct pf_kstate *st) 1981 { 1982 struct pfsync_softc *sc = V_pfsyncif; 1983 bool ref = true, full = false; 1984 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1985 1986 PF_STATE_LOCK_ASSERT(st); 1987 PFSYNC_BUCKET_LOCK(b); 1988 1989 if (st->state_flags & PFSTATE_NOSYNC) { 1990 if (st->sync_state != PFSYNC_S_NONE) 1991 pfsync_q_del(st, true, b); 1992 PFSYNC_BUCKET_UNLOCK(b); 1993 return (full); 1994 } 1995 1996 switch (st->sync_state) { 1997 case PFSYNC_S_UPD_C: 1998 case PFSYNC_S_IACK: 1999 pfsync_q_del(st, false, b); 2000 ref = false; 2001 /* FALLTHROUGH */ 2002 2003 case PFSYNC_S_NONE: 2004 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2005 pfsync_push(b); 2006 break; 2007 2008 case PFSYNC_S_INS: 2009 case PFSYNC_S_UPD: 2010 case PFSYNC_S_DEL_C: 2011 /* we're already handling it */ 2012 break; 2013 2014 default: 2015 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2016 } 2017 2018 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state)) 2019 full = true; 2020 2021 PFSYNC_BUCKET_UNLOCK(b); 2022 2023 return (full); 2024 } 2025 2026 static void 2027 pfsync_delete_state(struct pf_kstate *st) 2028 { 2029 struct pfsync_softc *sc = V_pfsyncif; 2030 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2031 bool ref = true; 2032 2033 PFSYNC_BUCKET_LOCK(b); 2034 if (st->state_flags & PFSTATE_ACK) 2035 pfsync_undefer_state_locked(st, 1); 2036 if (st->state_flags & PFSTATE_NOSYNC) { 2037 if (st->sync_state != PFSYNC_S_NONE) 2038 pfsync_q_del(st, true, b); 2039 PFSYNC_BUCKET_UNLOCK(b); 2040 return; 2041 } 2042 2043 if (b->b_len == PFSYNC_MINPKT) 2044 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2045 2046 switch (st->sync_state) { 2047 case PFSYNC_S_INS: 2048 /* We never got to tell the world so just forget about it. */ 2049 pfsync_q_del(st, true, b); 2050 break; 2051 2052 case PFSYNC_S_UPD_C: 2053 case PFSYNC_S_UPD: 2054 case PFSYNC_S_IACK: 2055 pfsync_q_del(st, false, b); 2056 ref = false; 2057 /* FALLTHROUGH */ 2058 2059 case PFSYNC_S_NONE: 2060 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2061 break; 2062 2063 default: 2064 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2065 } 2066 2067 PFSYNC_BUCKET_UNLOCK(b); 2068 } 2069 2070 static void 2071 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2072 { 2073 struct { 2074 struct pfsync_subheader subh; 2075 struct pfsync_clr clr; 2076 } __packed r; 2077 2078 bzero(&r, sizeof(r)); 2079 2080 r.subh.action = PFSYNC_ACT_CLR; 2081 r.subh.count = htons(1); 2082 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2083 2084 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2085 r.clr.creatorid = creatorid; 2086 2087 pfsync_send_plus(&r, sizeof(r)); 2088 } 2089 2090 static void 2091 pfsync_q_ins(struct pf_kstate *st, int q, bool ref) 2092 { 2093 struct pfsync_softc *sc = V_pfsyncif; 2094 size_t nlen = pfsync_qs[q].len; 2095 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2096 2097 PFSYNC_BUCKET_LOCK_ASSERT(b); 2098 2099 KASSERT(st->sync_state == PFSYNC_S_NONE, 2100 ("%s: st->sync_state %u", __func__, st->sync_state)); 2101 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2102 b->b_len)); 2103 2104 if (TAILQ_EMPTY(&b->b_qs[q])) 2105 nlen += sizeof(struct pfsync_subheader); 2106 2107 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2108 pfsync_sendout(1, b->b_id); 2109 2110 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2111 } 2112 2113 b->b_len += nlen; 2114 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2115 st->sync_state = q; 2116 if (ref) 2117 pf_ref_state(st); 2118 } 2119 2120 static void 2121 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2122 { 2123 int q = st->sync_state; 2124 2125 PFSYNC_BUCKET_LOCK_ASSERT(b); 2126 KASSERT(st->sync_state != PFSYNC_S_NONE, 2127 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2128 2129 b->b_len -= pfsync_qs[q].len; 2130 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2131 st->sync_state = PFSYNC_S_NONE; 2132 if (unref) 2133 pf_release_state(st); 2134 2135 if (TAILQ_EMPTY(&b->b_qs[q])) 2136 b->b_len -= sizeof(struct pfsync_subheader); 2137 } 2138 2139 static void 2140 pfsync_bulk_start(void) 2141 { 2142 struct pfsync_softc *sc = V_pfsyncif; 2143 2144 if (V_pf_status.debug >= PF_DEBUG_MISC) 2145 printf("pfsync: received bulk update request\n"); 2146 2147 PFSYNC_BLOCK(sc); 2148 2149 sc->sc_ureq_received = time_uptime; 2150 sc->sc_bulk_hashid = 0; 2151 sc->sc_bulk_stateid = 0; 2152 pfsync_bulk_status(PFSYNC_BUS_START); 2153 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2154 PFSYNC_BUNLOCK(sc); 2155 } 2156 2157 static void 2158 pfsync_bulk_update(void *arg) 2159 { 2160 struct pfsync_softc *sc = arg; 2161 struct pf_kstate *s; 2162 int i; 2163 2164 PFSYNC_BLOCK_ASSERT(sc); 2165 CURVNET_SET(sc->sc_ifp->if_vnet); 2166 2167 /* 2168 * Start with last state from previous invocation. 2169 * It may had gone, in this case start from the 2170 * hash slot. 2171 */ 2172 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2173 2174 if (s != NULL) 2175 i = PF_IDHASH(s); 2176 else 2177 i = sc->sc_bulk_hashid; 2178 2179 for (; i <= pf_hashmask; i++) { 2180 struct pf_idhash *ih = &V_pf_idhash[i]; 2181 2182 if (s != NULL) 2183 PF_HASHROW_ASSERT(ih); 2184 else { 2185 PF_HASHROW_LOCK(ih); 2186 s = LIST_FIRST(&ih->states); 2187 } 2188 2189 for (; s; s = LIST_NEXT(s, entry)) { 2190 if (s->sync_state == PFSYNC_S_NONE && 2191 s->timeout < PFTM_MAX && 2192 s->pfsync_time <= sc->sc_ureq_received) { 2193 if (pfsync_update_state_req(s)) { 2194 /* We've filled a packet. */ 2195 sc->sc_bulk_hashid = i; 2196 sc->sc_bulk_stateid = s->id; 2197 sc->sc_bulk_creatorid = s->creatorid; 2198 PF_HASHROW_UNLOCK(ih); 2199 callout_reset(&sc->sc_bulk_tmo, 1, 2200 pfsync_bulk_update, sc); 2201 goto full; 2202 } 2203 } 2204 } 2205 PF_HASHROW_UNLOCK(ih); 2206 } 2207 2208 /* We're done. */ 2209 pfsync_bulk_status(PFSYNC_BUS_END); 2210 full: 2211 CURVNET_RESTORE(); 2212 } 2213 2214 static void 2215 pfsync_bulk_status(u_int8_t status) 2216 { 2217 struct { 2218 struct pfsync_subheader subh; 2219 struct pfsync_bus bus; 2220 } __packed r; 2221 2222 struct pfsync_softc *sc = V_pfsyncif; 2223 2224 bzero(&r, sizeof(r)); 2225 2226 r.subh.action = PFSYNC_ACT_BUS; 2227 r.subh.count = htons(1); 2228 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2229 2230 r.bus.creatorid = V_pf_status.hostid; 2231 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2232 r.bus.status = status; 2233 2234 pfsync_send_plus(&r, sizeof(r)); 2235 } 2236 2237 static void 2238 pfsync_bulk_fail(void *arg) 2239 { 2240 struct pfsync_softc *sc = arg; 2241 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2242 2243 CURVNET_SET(sc->sc_ifp->if_vnet); 2244 2245 PFSYNC_BLOCK_ASSERT(sc); 2246 2247 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2248 /* Try again */ 2249 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2250 pfsync_bulk_fail, V_pfsyncif); 2251 PFSYNC_BUCKET_LOCK(b); 2252 pfsync_request_update(0, 0); 2253 PFSYNC_BUCKET_UNLOCK(b); 2254 } else { 2255 /* Pretend like the transfer was ok. */ 2256 sc->sc_ureq_sent = 0; 2257 sc->sc_bulk_tries = 0; 2258 PFSYNC_LOCK(sc); 2259 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2260 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2261 "pfsync bulk fail"); 2262 sc->sc_flags |= PFSYNCF_OK; 2263 PFSYNC_UNLOCK(sc); 2264 if (V_pf_status.debug >= PF_DEBUG_MISC) 2265 printf("pfsync: failed to receive bulk update\n"); 2266 } 2267 2268 CURVNET_RESTORE(); 2269 } 2270 2271 static void 2272 pfsync_send_plus(void *plus, size_t pluslen) 2273 { 2274 struct pfsync_softc *sc = V_pfsyncif; 2275 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2276 2277 PFSYNC_BUCKET_LOCK(b); 2278 2279 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2280 pfsync_sendout(1, b->b_id); 2281 2282 b->b_plus = plus; 2283 b->b_len += (b->b_pluslen = pluslen); 2284 2285 pfsync_sendout(1, b->b_id); 2286 PFSYNC_BUCKET_UNLOCK(b); 2287 } 2288 2289 static void 2290 pfsync_timeout(void *arg) 2291 { 2292 struct pfsync_bucket *b = arg; 2293 2294 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2295 PFSYNC_BUCKET_LOCK(b); 2296 pfsync_push(b); 2297 PFSYNC_BUCKET_UNLOCK(b); 2298 CURVNET_RESTORE(); 2299 } 2300 2301 static void 2302 pfsync_push(struct pfsync_bucket *b) 2303 { 2304 2305 PFSYNC_BUCKET_LOCK_ASSERT(b); 2306 2307 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2308 swi_sched(V_pfsync_swi_cookie, 0); 2309 } 2310 2311 static void 2312 pfsync_push_all(struct pfsync_softc *sc) 2313 { 2314 int c; 2315 struct pfsync_bucket *b; 2316 2317 for (c = 0; c < pfsync_buckets; c++) { 2318 b = &sc->sc_buckets[c]; 2319 2320 PFSYNC_BUCKET_LOCK(b); 2321 pfsync_push(b); 2322 PFSYNC_BUCKET_UNLOCK(b); 2323 } 2324 } 2325 2326 static void 2327 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2328 { 2329 struct ip *ip; 2330 int af, error = 0; 2331 2332 ip = mtod(m, struct ip *); 2333 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2334 2335 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2336 2337 /* 2338 * We distinguish between a deferral packet and our 2339 * own pfsync packet based on M_SKIP_FIREWALL 2340 * flag. This is XXX. 2341 */ 2342 switch (af) { 2343 #ifdef INET 2344 case AF_INET: 2345 if (m->m_flags & M_SKIP_FIREWALL) { 2346 error = ip_output(m, NULL, NULL, 0, 2347 NULL, NULL); 2348 } else { 2349 error = ip_output(m, NULL, NULL, 2350 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2351 } 2352 break; 2353 #endif 2354 #ifdef INET6 2355 case AF_INET6: 2356 if (m->m_flags & M_SKIP_FIREWALL) { 2357 error = ip6_output(m, NULL, NULL, 0, 2358 NULL, NULL, NULL); 2359 } else { 2360 MPASS(false); 2361 /* We don't support pfsync over IPv6. */ 2362 /*error = ip6_output(m, NULL, NULL, 2363 IP_RAWOUTPUT, &sc->sc_imo6, NULL);*/ 2364 } 2365 break; 2366 #endif 2367 } 2368 2369 if (error == 0) 2370 V_pfsyncstats.pfsyncs_opackets++; 2371 else 2372 V_pfsyncstats.pfsyncs_oerrors++; 2373 2374 } 2375 2376 static void 2377 pfsyncintr(void *arg) 2378 { 2379 struct epoch_tracker et; 2380 struct pfsync_softc *sc = arg; 2381 struct pfsync_bucket *b; 2382 struct mbuf *m, *n; 2383 int c; 2384 2385 NET_EPOCH_ENTER(et); 2386 CURVNET_SET(sc->sc_ifp->if_vnet); 2387 2388 for (c = 0; c < pfsync_buckets; c++) { 2389 b = &sc->sc_buckets[c]; 2390 2391 PFSYNC_BUCKET_LOCK(b); 2392 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2393 pfsync_sendout(0, b->b_id); 2394 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2395 } 2396 _IF_DEQUEUE_ALL(&b->b_snd, m); 2397 PFSYNC_BUCKET_UNLOCK(b); 2398 2399 for (; m != NULL; m = n) { 2400 n = m->m_nextpkt; 2401 m->m_nextpkt = NULL; 2402 2403 pfsync_tx(sc, m); 2404 } 2405 } 2406 CURVNET_RESTORE(); 2407 NET_EPOCH_EXIT(et); 2408 } 2409 2410 static int 2411 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2412 struct in_mfilter *imf) 2413 { 2414 struct ip_moptions *imo = &sc->sc_imo; 2415 int error; 2416 2417 if (!(ifp->if_flags & IFF_MULTICAST)) 2418 return (EADDRNOTAVAIL); 2419 2420 switch (sc->sc_sync_peer.ss_family) { 2421 #ifdef INET 2422 case AF_INET: 2423 { 2424 ip_mfilter_init(&imo->imo_head); 2425 imo->imo_multicast_vif = -1; 2426 if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2427 &imf->imf_inm)) != 0) 2428 return (error); 2429 2430 ip_mfilter_insert(&imo->imo_head, imf); 2431 imo->imo_multicast_ifp = ifp; 2432 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2433 imo->imo_multicast_loop = 0; 2434 break; 2435 } 2436 #endif 2437 } 2438 2439 return (0); 2440 } 2441 2442 static void 2443 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2444 { 2445 struct ip_moptions *imo = &sc->sc_imo; 2446 struct in_mfilter *imf; 2447 2448 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2449 ip_mfilter_remove(&imo->imo_head, imf); 2450 in_leavegroup(imf->imf_inm, NULL); 2451 ip_mfilter_free(imf); 2452 } 2453 imo->imo_multicast_ifp = NULL; 2454 } 2455 2456 void 2457 pfsync_detach_ifnet(struct ifnet *ifp) 2458 { 2459 struct pfsync_softc *sc = V_pfsyncif; 2460 2461 if (sc == NULL) 2462 return; 2463 2464 PFSYNC_LOCK(sc); 2465 2466 if (sc->sc_sync_if == ifp) { 2467 /* We don't need mutlicast cleanup here, because the interface 2468 * is going away. We do need to ensure we don't try to do 2469 * cleanup later. 2470 */ 2471 ip_mfilter_init(&sc->sc_imo.imo_head); 2472 sc->sc_imo.imo_multicast_ifp = NULL; 2473 sc->sc_sync_if = NULL; 2474 } 2475 2476 PFSYNC_UNLOCK(sc); 2477 } 2478 2479 static int 2480 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2481 { 2482 struct sockaddr_storage sa; 2483 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2484 status->flags = pfsyncr->pfsyncr_defer; 2485 2486 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2487 2488 memset(&sa, 0, sizeof(sa)); 2489 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2490 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2491 in->sin_family = AF_INET; 2492 in->sin_len = sizeof(*in); 2493 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2494 } 2495 status->syncpeer = sa; 2496 2497 return 0; 2498 } 2499 2500 static int 2501 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2502 { 2503 struct in_mfilter *imf = NULL; 2504 struct ifnet *sifp; 2505 struct ip *ip; 2506 int error; 2507 int c; 2508 2509 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2510 return (EINVAL); 2511 2512 if (status->syncdev[0] == '\0') 2513 sifp = NULL; 2514 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2515 return (EINVAL); 2516 2517 struct sockaddr_in *status_sin = 2518 (struct sockaddr_in *)&(status->syncpeer); 2519 if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || 2520 status_sin->sin_addr.s_addr == 2521 htonl(INADDR_PFSYNC_GROUP))) 2522 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2523 2524 PFSYNC_LOCK(sc); 2525 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 2526 sc_sin->sin_family = AF_INET; 2527 sc_sin->sin_len = sizeof(*sc_sin); 2528 if (status_sin->sin_addr.s_addr == 0) { 2529 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 2530 } else { 2531 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 2532 } 2533 2534 sc->sc_maxupdates = status->maxupdates; 2535 if (status->flags & PFSYNCF_DEFER) { 2536 sc->sc_flags |= PFSYNCF_DEFER; 2537 V_pfsync_defer_ptr = pfsync_defer; 2538 } else { 2539 sc->sc_flags &= ~PFSYNCF_DEFER; 2540 V_pfsync_defer_ptr = NULL; 2541 } 2542 2543 if (sifp == NULL) { 2544 if (sc->sc_sync_if) 2545 if_rele(sc->sc_sync_if); 2546 sc->sc_sync_if = NULL; 2547 pfsync_multicast_cleanup(sc); 2548 PFSYNC_UNLOCK(sc); 2549 return (0); 2550 } 2551 2552 for (c = 0; c < pfsync_buckets; c++) { 2553 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 2554 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 2555 (sifp->if_mtu < sc->sc_ifp->if_mtu || 2556 (sc->sc_sync_if != NULL && 2557 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 2558 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 2559 pfsync_sendout(1, c); 2560 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 2561 } 2562 2563 pfsync_multicast_cleanup(sc); 2564 2565 if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 2566 error = pfsync_multicast_setup(sc, sifp, imf); 2567 if (error) { 2568 if_rele(sifp); 2569 ip_mfilter_free(imf); 2570 PFSYNC_UNLOCK(sc); 2571 return (error); 2572 } 2573 } 2574 if (sc->sc_sync_if) 2575 if_rele(sc->sc_sync_if); 2576 sc->sc_sync_if = sifp; 2577 2578 ip = &sc->sc_template.ipv4; 2579 bzero(ip, sizeof(*ip)); 2580 ip->ip_v = IPVERSION; 2581 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 2582 ip->ip_tos = IPTOS_LOWDELAY; 2583 /* len and id are set later. */ 2584 ip->ip_off = htons(IP_DF); 2585 ip->ip_ttl = PFSYNC_DFLTTL; 2586 ip->ip_p = IPPROTO_PFSYNC; 2587 ip->ip_src.s_addr = INADDR_ANY; 2588 ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; 2589 2590 /* Request a full state table update. */ 2591 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2592 (*carp_demote_adj_p)(V_pfsync_carp_adj, 2593 "pfsync bulk start"); 2594 sc->sc_flags &= ~PFSYNCF_OK; 2595 if (V_pf_status.debug >= PF_DEBUG_MISC) 2596 printf("pfsync: requesting bulk update\n"); 2597 PFSYNC_UNLOCK(sc); 2598 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 2599 pfsync_request_update(0, 0); 2600 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 2601 PFSYNC_BLOCK(sc); 2602 sc->sc_ureq_sent = time_uptime; 2603 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 2604 PFSYNC_BUNLOCK(sc); 2605 return (0); 2606 } 2607 2608 static void 2609 pfsync_pointers_init(void) 2610 { 2611 2612 PF_RULES_WLOCK(); 2613 V_pfsync_state_import_ptr = pfsync_state_import; 2614 V_pfsync_insert_state_ptr = pfsync_insert_state; 2615 V_pfsync_update_state_ptr = pfsync_update_state; 2616 V_pfsync_delete_state_ptr = pfsync_delete_state; 2617 V_pfsync_clear_states_ptr = pfsync_clear_states; 2618 V_pfsync_defer_ptr = pfsync_defer; 2619 PF_RULES_WUNLOCK(); 2620 } 2621 2622 static void 2623 pfsync_pointers_uninit(void) 2624 { 2625 2626 PF_RULES_WLOCK(); 2627 V_pfsync_state_import_ptr = NULL; 2628 V_pfsync_insert_state_ptr = NULL; 2629 V_pfsync_update_state_ptr = NULL; 2630 V_pfsync_delete_state_ptr = NULL; 2631 V_pfsync_clear_states_ptr = NULL; 2632 V_pfsync_defer_ptr = NULL; 2633 PF_RULES_WUNLOCK(); 2634 } 2635 2636 static void 2637 vnet_pfsync_init(const void *unused __unused) 2638 { 2639 int error; 2640 2641 V_pfsync_cloner = if_clone_simple(pfsyncname, 2642 pfsync_clone_create, pfsync_clone_destroy, 1); 2643 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 2644 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2645 if (error) { 2646 if_clone_detach(V_pfsync_cloner); 2647 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2648 } 2649 2650 pfsync_pointers_init(); 2651 } 2652 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2653 vnet_pfsync_init, NULL); 2654 2655 static void 2656 vnet_pfsync_uninit(const void *unused __unused) 2657 { 2658 int ret __diagused; 2659 2660 pfsync_pointers_uninit(); 2661 2662 if_clone_detach(V_pfsync_cloner); 2663 ret = swi_remove(V_pfsync_swi_cookie); 2664 MPASS(ret == 0); 2665 ret = intr_event_destroy(V_pfsync_swi_ie); 2666 MPASS(ret == 0); 2667 } 2668 2669 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 2670 vnet_pfsync_uninit, NULL); 2671 2672 static int 2673 pfsync_init(void) 2674 { 2675 #ifdef INET 2676 int error; 2677 2678 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 2679 2680 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 2681 if (error) 2682 return (error); 2683 #endif 2684 2685 return (0); 2686 } 2687 2688 static void 2689 pfsync_uninit(void) 2690 { 2691 pfsync_detach_ifnet_ptr = NULL; 2692 2693 #ifdef INET 2694 ipproto_unregister(IPPROTO_PFSYNC); 2695 #endif 2696 } 2697 2698 static int 2699 pfsync_modevent(module_t mod, int type, void *data) 2700 { 2701 int error = 0; 2702 2703 switch (type) { 2704 case MOD_LOAD: 2705 error = pfsync_init(); 2706 break; 2707 case MOD_UNLOAD: 2708 pfsync_uninit(); 2709 break; 2710 default: 2711 error = EINVAL; 2712 break; 2713 } 2714 2715 return (error); 2716 } 2717 2718 static moduledata_t pfsync_mod = { 2719 pfsyncname, 2720 pfsync_modevent, 2721 0 2722 }; 2723 2724 #define PFSYNC_MODVER 1 2725 2726 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2727 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2728 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2729 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2730