1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/nv.h> 79 #include <sys/priv.h> 80 #include <sys/smp.h> 81 #include <sys/socket.h> 82 #include <sys/sockio.h> 83 #include <sys/sysctl.h> 84 #include <sys/syslog.h> 85 86 #include <net/bpf.h> 87 #include <net/if.h> 88 #include <net/if_var.h> 89 #include <net/if_clone.h> 90 #include <net/if_private.h> 91 #include <net/if_types.h> 92 #include <net/vnet.h> 93 #include <net/pfvar.h> 94 #include <net/if_pfsync.h> 95 96 #include <netinet/if_ether.h> 97 #include <netinet/in.h> 98 #include <netinet/in_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip_carp.h> 101 #include <netinet/ip_var.h> 102 #include <netinet/tcp.h> 103 #include <netinet/tcp_fsm.h> 104 #include <netinet/tcp_seq.h> 105 106 #include <netpfil/pf/pfsync_nv.h> 107 108 struct pfsync_bucket; 109 110 union inet_template { 111 struct ip ipv4; 112 }; 113 114 #define PFSYNC_MINPKT ( \ 115 sizeof(union inet_template) + \ 116 sizeof(struct pfsync_header) + \ 117 sizeof(struct pfsync_subheader) ) 118 119 static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, 120 struct pfsync_state_peer *); 121 static int pfsync_in_clr(struct mbuf *, int, int, int); 122 static int pfsync_in_ins(struct mbuf *, int, int, int); 123 static int pfsync_in_iack(struct mbuf *, int, int, int); 124 static int pfsync_in_upd(struct mbuf *, int, int, int); 125 static int pfsync_in_upd_c(struct mbuf *, int, int, int); 126 static int pfsync_in_ureq(struct mbuf *, int, int, int); 127 static int pfsync_in_del(struct mbuf *, int, int, int); 128 static int pfsync_in_del_c(struct mbuf *, int, int, int); 129 static int pfsync_in_bus(struct mbuf *, int, int, int); 130 static int pfsync_in_tdb(struct mbuf *, int, int, int); 131 static int pfsync_in_eof(struct mbuf *, int, int, int); 132 static int pfsync_in_error(struct mbuf *, int, int, int); 133 134 static int (*pfsync_acts[])(struct mbuf *, int, int, int) = { 135 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 136 pfsync_in_ins, /* PFSYNC_ACT_INS */ 137 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 138 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 139 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 140 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 141 pfsync_in_del, /* PFSYNC_ACT_DEL */ 142 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 143 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 144 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 145 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 146 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 147 pfsync_in_eof /* PFSYNC_ACT_EOF */ 148 }; 149 150 struct pfsync_q { 151 void (*write)(struct pf_kstate *, void *); 152 size_t len; 153 u_int8_t action; 154 }; 155 156 /* we have one of these for every PFSYNC_S_ */ 157 static void pfsync_out_state(struct pf_kstate *, void *); 158 static void pfsync_out_iack(struct pf_kstate *, void *); 159 static void pfsync_out_upd_c(struct pf_kstate *, void *); 160 static void pfsync_out_del(struct pf_kstate *, void *); 161 162 static struct pfsync_q pfsync_qs[] = { 163 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 164 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 165 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 166 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 167 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 168 }; 169 170 static void pfsync_q_ins(struct pf_kstate *, int, bool); 171 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 172 173 static void pfsync_update_state(struct pf_kstate *); 174 175 struct pfsync_upd_req_item { 176 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 177 struct pfsync_upd_req ur_msg; 178 }; 179 180 struct pfsync_deferral { 181 struct pfsync_softc *pd_sc; 182 TAILQ_ENTRY(pfsync_deferral) pd_entry; 183 u_int pd_refs; 184 struct callout pd_tmo; 185 186 struct pf_kstate *pd_st; 187 struct mbuf *pd_m; 188 }; 189 190 struct pfsync_sofct; 191 192 struct pfsync_bucket 193 { 194 int b_id; 195 struct pfsync_softc *b_sc; 196 struct mtx b_mtx; 197 struct callout b_tmo; 198 int b_flags; 199 #define PFSYNCF_BUCKET_PUSH 0x00000001 200 201 size_t b_len; 202 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_S_COUNT]; 203 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 204 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 205 u_int b_deferred; 206 void *b_plus; 207 size_t b_pluslen; 208 209 struct ifaltq b_snd; 210 }; 211 212 struct pfsync_softc { 213 /* Configuration */ 214 struct ifnet *sc_ifp; 215 struct ifnet *sc_sync_if; 216 struct ip_moptions sc_imo; 217 struct sockaddr_storage sc_sync_peer; 218 uint32_t sc_flags; 219 uint8_t sc_maxupdates; 220 union inet_template sc_template; 221 struct mtx sc_mtx; 222 223 /* Queued data */ 224 struct pfsync_bucket *sc_buckets; 225 226 /* Bulk update info */ 227 struct mtx sc_bulk_mtx; 228 uint32_t sc_ureq_sent; 229 int sc_bulk_tries; 230 uint32_t sc_ureq_received; 231 int sc_bulk_hashid; 232 uint64_t sc_bulk_stateid; 233 uint32_t sc_bulk_creatorid; 234 struct callout sc_bulk_tmo; 235 struct callout sc_bulkfail_tmo; 236 }; 237 238 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 239 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 240 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 241 242 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 243 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 244 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 245 246 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 247 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 248 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 249 250 static const char pfsyncname[] = "pfsync"; 251 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 252 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 253 #define V_pfsyncif VNET(pfsyncif) 254 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 255 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 256 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 257 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 258 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 259 #define V_pfsyncstats VNET(pfsyncstats) 260 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 261 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 262 263 static void pfsync_timeout(void *); 264 static void pfsync_push(struct pfsync_bucket *); 265 static void pfsync_push_all(struct pfsync_softc *); 266 static void pfsyncintr(void *); 267 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 268 struct in_mfilter *imf); 269 static void pfsync_multicast_cleanup(struct pfsync_softc *); 270 static void pfsync_pointers_init(void); 271 static void pfsync_pointers_uninit(void); 272 static int pfsync_init(void); 273 static void pfsync_uninit(void); 274 275 static unsigned long pfsync_buckets; 276 277 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 278 "PFSYNC"); 279 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 280 &VNET_NAME(pfsyncstats), pfsyncstats, 281 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 282 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 283 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 284 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 285 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 286 287 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 288 static void pfsync_clone_destroy(struct ifnet *); 289 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 290 struct pf_state_peer *); 291 static int pfsyncoutput(struct ifnet *, struct mbuf *, 292 const struct sockaddr *, struct route *); 293 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 294 295 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 296 static void pfsync_undefer(struct pfsync_deferral *, int); 297 static void pfsync_undefer_state(struct pf_kstate *, int); 298 static void pfsync_defer_tmo(void *); 299 300 static void pfsync_request_update(u_int32_t, u_int64_t); 301 static bool pfsync_update_state_req(struct pf_kstate *); 302 303 static void pfsync_drop(struct pfsync_softc *); 304 static void pfsync_sendout(int, int); 305 static void pfsync_send_plus(void *, size_t); 306 307 static void pfsync_bulk_start(void); 308 static void pfsync_bulk_status(u_int8_t); 309 static void pfsync_bulk_update(void *); 310 static void pfsync_bulk_fail(void *); 311 312 static void pfsync_detach_ifnet(struct ifnet *); 313 314 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 315 struct pfsync_kstatus *); 316 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 317 struct pfsync_softc *); 318 319 #ifdef IPSEC 320 static void pfsync_update_net_tdb(struct pfsync_tdb *); 321 #endif 322 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 323 struct pf_kstate *); 324 325 #define PFSYNC_MAX_BULKTRIES 12 326 #define PFSYNC_DEFER_TIMEOUT ((20 * hz) / 1000) 327 328 VNET_DEFINE(struct if_clone *, pfsync_cloner); 329 #define V_pfsync_cloner VNET(pfsync_cloner) 330 331 static int 332 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 333 { 334 struct pfsync_softc *sc; 335 struct ifnet *ifp; 336 struct pfsync_bucket *b; 337 int c, q; 338 339 if (unit != 0) 340 return (EINVAL); 341 342 if (! pfsync_buckets) 343 pfsync_buckets = mp_ncpus * 2; 344 345 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 346 sc->sc_flags |= PFSYNCF_OK; 347 sc->sc_maxupdates = 128; 348 349 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 350 if (ifp == NULL) { 351 free(sc, M_PFSYNC); 352 return (ENOSPC); 353 } 354 if_initname(ifp, pfsyncname, unit); 355 ifp->if_softc = sc; 356 ifp->if_ioctl = pfsyncioctl; 357 ifp->if_output = pfsyncoutput; 358 ifp->if_type = IFT_PFSYNC; 359 ifp->if_hdrlen = sizeof(struct pfsync_header); 360 ifp->if_mtu = ETHERMTU; 361 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 362 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 363 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 364 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 365 366 if_attach(ifp); 367 368 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 369 370 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 371 M_PFSYNC, M_ZERO | M_WAITOK); 372 for (c = 0; c < pfsync_buckets; c++) { 373 b = &sc->sc_buckets[c]; 374 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 375 376 b->b_id = c; 377 b->b_sc = sc; 378 b->b_len = PFSYNC_MINPKT; 379 380 for (q = 0; q < PFSYNC_S_COUNT; q++) 381 TAILQ_INIT(&b->b_qs[q]); 382 383 TAILQ_INIT(&b->b_upd_req_list); 384 TAILQ_INIT(&b->b_deferrals); 385 386 callout_init(&b->b_tmo, 1); 387 388 b->b_snd.ifq_maxlen = ifqmaxlen; 389 } 390 391 V_pfsyncif = sc; 392 393 return (0); 394 } 395 396 static void 397 pfsync_clone_destroy(struct ifnet *ifp) 398 { 399 struct pfsync_softc *sc = ifp->if_softc; 400 struct pfsync_bucket *b; 401 int c; 402 403 for (c = 0; c < pfsync_buckets; c++) { 404 b = &sc->sc_buckets[c]; 405 /* 406 * At this stage, everything should have already been 407 * cleared by pfsync_uninit(), and we have only to 408 * drain callouts. 409 */ 410 while (b->b_deferred > 0) { 411 struct pfsync_deferral *pd = 412 TAILQ_FIRST(&b->b_deferrals); 413 414 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 415 b->b_deferred--; 416 if (callout_stop(&pd->pd_tmo) > 0) { 417 pf_release_state(pd->pd_st); 418 m_freem(pd->pd_m); 419 free(pd, M_PFSYNC); 420 } else { 421 pd->pd_refs++; 422 callout_drain(&pd->pd_tmo); 423 free(pd, M_PFSYNC); 424 } 425 } 426 427 callout_drain(&b->b_tmo); 428 } 429 430 callout_drain(&sc->sc_bulkfail_tmo); 431 callout_drain(&sc->sc_bulk_tmo); 432 433 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 434 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 435 bpfdetach(ifp); 436 if_detach(ifp); 437 438 pfsync_drop(sc); 439 440 if_free(ifp); 441 pfsync_multicast_cleanup(sc); 442 mtx_destroy(&sc->sc_mtx); 443 mtx_destroy(&sc->sc_bulk_mtx); 444 445 free(sc->sc_buckets, M_PFSYNC); 446 free(sc, M_PFSYNC); 447 448 V_pfsyncif = NULL; 449 } 450 451 static int 452 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 453 struct pf_state_peer *d) 454 { 455 if (s->scrub.scrub_flag && d->scrub == NULL) { 456 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 457 if (d->scrub == NULL) 458 return (ENOMEM); 459 } 460 461 return (0); 462 } 463 464 static int 465 pfsync_state_import(struct pfsync_state *sp, int flags) 466 { 467 struct pfsync_softc *sc = V_pfsyncif; 468 #ifndef __NO_STRICT_ALIGNMENT 469 struct pfsync_state_key key[2]; 470 #endif 471 struct pfsync_state_key *kw, *ks; 472 struct pf_kstate *st = NULL; 473 struct pf_state_key *skw = NULL, *sks = NULL; 474 struct pf_krule *r = NULL; 475 struct pfi_kkif *kif; 476 int error; 477 478 PF_RULES_RASSERT(); 479 480 if (sp->creatorid == 0) { 481 if (V_pf_status.debug >= PF_DEBUG_MISC) 482 printf("%s: invalid creator id: %08x\n", __func__, 483 ntohl(sp->creatorid)); 484 return (EINVAL); 485 } 486 487 if ((kif = pfi_kkif_find(sp->ifname)) == NULL) { 488 if (V_pf_status.debug >= PF_DEBUG_MISC) 489 printf("%s: unknown interface: %s\n", __func__, 490 sp->ifname); 491 if (flags & PFSYNC_SI_IOCTL) 492 return (EINVAL); 493 return (0); /* skip this state */ 494 } 495 496 /* 497 * If the ruleset checksums match or the state is coming from the ioctl, 498 * it's safe to associate the state with the rule of that number. 499 */ 500 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 501 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 502 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 503 r = pf_main_ruleset.rules[ 504 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 505 else 506 r = &V_pf_default_rule; 507 508 if ((r->max_states && 509 counter_u64_fetch(r->states_cur) >= r->max_states)) 510 goto cleanup; 511 512 /* 513 * XXXGL: consider M_WAITOK in ioctl path after. 514 */ 515 st = pf_alloc_state(M_NOWAIT); 516 if (__predict_false(st == NULL)) 517 goto cleanup; 518 519 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 520 goto cleanup; 521 522 #ifndef __NO_STRICT_ALIGNMENT 523 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 524 kw = &key[PF_SK_WIRE]; 525 ks = &key[PF_SK_STACK]; 526 #else 527 kw = &sp->key[PF_SK_WIRE]; 528 ks = &sp->key[PF_SK_STACK]; 529 #endif 530 531 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 532 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 533 kw->port[0] != ks->port[0] || 534 kw->port[1] != ks->port[1]) { 535 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 536 if (sks == NULL) 537 goto cleanup; 538 } else 539 sks = skw; 540 541 /* allocate memory for scrub info */ 542 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 543 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 544 goto cleanup; 545 546 /* Copy to state key(s). */ 547 skw->addr[0] = kw->addr[0]; 548 skw->addr[1] = kw->addr[1]; 549 skw->port[0] = kw->port[0]; 550 skw->port[1] = kw->port[1]; 551 skw->proto = sp->proto; 552 skw->af = sp->af; 553 if (sks != skw) { 554 sks->addr[0] = ks->addr[0]; 555 sks->addr[1] = ks->addr[1]; 556 sks->port[0] = ks->port[0]; 557 sks->port[1] = ks->port[1]; 558 sks->proto = sp->proto; 559 sks->af = sp->af; 560 } 561 562 /* copy to state */ 563 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 564 st->creation = time_uptime - ntohl(sp->creation); 565 st->expire = time_uptime; 566 if (sp->expire) { 567 uint32_t timeout; 568 569 timeout = r->timeout[sp->timeout]; 570 if (!timeout) 571 timeout = V_pf_default_rule.timeout[sp->timeout]; 572 573 /* sp->expire may have been adaptively scaled by export. */ 574 st->expire -= timeout - ntohl(sp->expire); 575 } 576 577 st->direction = sp->direction; 578 st->log = sp->log; 579 st->timeout = sp->timeout; 580 st->state_flags = sp->state_flags; 581 582 st->id = sp->id; 583 st->creatorid = sp->creatorid; 584 pf_state_peer_ntoh(&sp->src, &st->src); 585 pf_state_peer_ntoh(&sp->dst, &st->dst); 586 587 st->rule.ptr = r; 588 st->nat_rule.ptr = NULL; 589 st->anchor.ptr = NULL; 590 st->rt_kif = NULL; 591 592 st->pfsync_time = time_uptime; 593 st->sync_state = PFSYNC_S_NONE; 594 595 if (!(flags & PFSYNC_SI_IOCTL)) 596 st->state_flags |= PFSTATE_NOSYNC; 597 598 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 599 goto cleanup_state; 600 601 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 602 counter_u64_add(r->states_cur, 1); 603 counter_u64_add(r->states_tot, 1); 604 605 if (!(flags & PFSYNC_SI_IOCTL)) { 606 st->state_flags &= ~PFSTATE_NOSYNC; 607 if (st->state_flags & PFSTATE_ACK) { 608 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 609 PFSYNC_BUCKET_LOCK(b); 610 pfsync_q_ins(st, PFSYNC_S_IACK, true); 611 PFSYNC_BUCKET_UNLOCK(b); 612 613 pfsync_push_all(sc); 614 } 615 } 616 st->state_flags &= ~PFSTATE_ACK; 617 PF_STATE_UNLOCK(st); 618 619 return (0); 620 621 cleanup: 622 error = ENOMEM; 623 if (skw == sks) 624 sks = NULL; 625 if (skw != NULL) 626 uma_zfree(V_pf_state_key_z, skw); 627 if (sks != NULL) 628 uma_zfree(V_pf_state_key_z, sks); 629 630 cleanup_state: /* pf_state_insert() frees the state keys. */ 631 if (st) { 632 st->timeout = PFTM_UNLINKED; /* appease an assert */ 633 pf_free_state(st); 634 } 635 return (error); 636 } 637 638 #ifdef INET 639 static int 640 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 641 { 642 struct pfsync_softc *sc = V_pfsyncif; 643 struct mbuf *m = *mp; 644 struct ip *ip = mtod(m, struct ip *); 645 struct pfsync_header *ph; 646 struct pfsync_subheader subh; 647 648 int offset, len, flags = 0; 649 int rv; 650 uint16_t count; 651 652 PF_RULES_RLOCK_TRACKER; 653 654 *mp = NULL; 655 V_pfsyncstats.pfsyncs_ipackets++; 656 657 /* Verify that we have a sync interface configured. */ 658 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 659 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 660 goto done; 661 662 /* verify that the packet came in on the right interface */ 663 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 664 V_pfsyncstats.pfsyncs_badif++; 665 goto done; 666 } 667 668 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 669 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 670 /* verify that the IP TTL is 255. */ 671 if (ip->ip_ttl != PFSYNC_DFLTTL) { 672 V_pfsyncstats.pfsyncs_badttl++; 673 goto done; 674 } 675 676 offset = ip->ip_hl << 2; 677 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 678 V_pfsyncstats.pfsyncs_hdrops++; 679 goto done; 680 } 681 682 if (offset + sizeof(*ph) > m->m_len) { 683 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 684 V_pfsyncstats.pfsyncs_hdrops++; 685 return (IPPROTO_DONE); 686 } 687 ip = mtod(m, struct ip *); 688 } 689 ph = (struct pfsync_header *)((char *)ip + offset); 690 691 /* verify the version */ 692 if (ph->version != PFSYNC_VERSION) { 693 V_pfsyncstats.pfsyncs_badver++; 694 goto done; 695 } 696 697 len = ntohs(ph->len) + offset; 698 if (m->m_pkthdr.len < len) { 699 V_pfsyncstats.pfsyncs_badlen++; 700 goto done; 701 } 702 703 /* 704 * Trusting pf_chksum during packet processing, as well as seeking 705 * in interface name tree, require holding PF_RULES_RLOCK(). 706 */ 707 PF_RULES_RLOCK(); 708 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 709 flags = PFSYNC_SI_CKSUM; 710 711 offset += sizeof(*ph); 712 while (offset <= len - sizeof(subh)) { 713 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 714 offset += sizeof(subh); 715 716 if (subh.action >= PFSYNC_ACT_MAX) { 717 V_pfsyncstats.pfsyncs_badact++; 718 PF_RULES_RUNLOCK(); 719 goto done; 720 } 721 722 count = ntohs(subh.count); 723 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 724 rv = (*pfsync_acts[subh.action])(m, offset, count, flags); 725 if (rv == -1) { 726 PF_RULES_RUNLOCK(); 727 return (IPPROTO_DONE); 728 } 729 730 offset += rv; 731 } 732 PF_RULES_RUNLOCK(); 733 734 done: 735 m_freem(m); 736 return (IPPROTO_DONE); 737 } 738 #endif 739 740 static int 741 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags) 742 { 743 struct pfsync_clr *clr; 744 struct mbuf *mp; 745 int len = sizeof(*clr) * count; 746 int i, offp; 747 u_int32_t creatorid; 748 749 mp = m_pulldown(m, offset, len, &offp); 750 if (mp == NULL) { 751 V_pfsyncstats.pfsyncs_badlen++; 752 return (-1); 753 } 754 clr = (struct pfsync_clr *)(mp->m_data + offp); 755 756 for (i = 0; i < count; i++) { 757 creatorid = clr[i].creatorid; 758 759 if (clr[i].ifname[0] != '\0' && 760 pfi_kkif_find(clr[i].ifname) == NULL) 761 continue; 762 763 for (int i = 0; i <= pf_hashmask; i++) { 764 struct pf_idhash *ih = &V_pf_idhash[i]; 765 struct pf_kstate *s; 766 relock: 767 PF_HASHROW_LOCK(ih); 768 LIST_FOREACH(s, &ih->states, entry) { 769 if (s->creatorid == creatorid) { 770 s->state_flags |= PFSTATE_NOSYNC; 771 pf_unlink_state(s); 772 goto relock; 773 } 774 } 775 PF_HASHROW_UNLOCK(ih); 776 } 777 } 778 779 return (len); 780 } 781 782 static int 783 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags) 784 { 785 struct mbuf *mp; 786 struct pfsync_state *sa, *sp; 787 int len = sizeof(*sp) * count; 788 int i, offp; 789 790 mp = m_pulldown(m, offset, len, &offp); 791 if (mp == NULL) { 792 V_pfsyncstats.pfsyncs_badlen++; 793 return (-1); 794 } 795 sa = (struct pfsync_state *)(mp->m_data + offp); 796 797 for (i = 0; i < count; i++) { 798 sp = &sa[i]; 799 800 /* Check for invalid values. */ 801 if (sp->timeout >= PFTM_MAX || 802 sp->src.state > PF_TCPS_PROXY_DST || 803 sp->dst.state > PF_TCPS_PROXY_DST || 804 sp->direction > PF_OUT || 805 (sp->af != AF_INET && sp->af != AF_INET6)) { 806 if (V_pf_status.debug >= PF_DEBUG_MISC) 807 printf("%s: invalid value\n", __func__); 808 V_pfsyncstats.pfsyncs_badval++; 809 continue; 810 } 811 812 if (pfsync_state_import(sp, flags) == ENOMEM) 813 /* Drop out, but process the rest of the actions. */ 814 break; 815 } 816 817 return (len); 818 } 819 820 static int 821 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags) 822 { 823 struct pfsync_ins_ack *ia, *iaa; 824 struct pf_kstate *st; 825 826 struct mbuf *mp; 827 int len = count * sizeof(*ia); 828 int offp, i; 829 830 mp = m_pulldown(m, offset, len, &offp); 831 if (mp == NULL) { 832 V_pfsyncstats.pfsyncs_badlen++; 833 return (-1); 834 } 835 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 836 837 for (i = 0; i < count; i++) { 838 ia = &iaa[i]; 839 840 st = pf_find_state_byid(ia->id, ia->creatorid); 841 if (st == NULL) 842 continue; 843 844 if (st->state_flags & PFSTATE_ACK) { 845 pfsync_undefer_state(st, 0); 846 } 847 PF_STATE_UNLOCK(st); 848 } 849 /* 850 * XXX this is not yet implemented, but we know the size of the 851 * message so we can skip it. 852 */ 853 854 return (count * sizeof(struct pfsync_ins_ack)); 855 } 856 857 static int 858 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, 859 struct pfsync_state_peer *dst) 860 { 861 int sync = 0; 862 863 PF_STATE_LOCK_ASSERT(st); 864 865 /* 866 * The state should never go backwards except 867 * for syn-proxy states. Neither should the 868 * sequence window slide backwards. 869 */ 870 if ((st->src.state > src->state && 871 (st->src.state < PF_TCPS_PROXY_SRC || 872 src->state >= PF_TCPS_PROXY_SRC)) || 873 874 (st->src.state == src->state && 875 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 876 sync++; 877 else 878 pf_state_peer_ntoh(src, &st->src); 879 880 if ((st->dst.state > dst->state) || 881 882 (st->dst.state >= TCPS_SYN_SENT && 883 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 884 sync++; 885 else 886 pf_state_peer_ntoh(dst, &st->dst); 887 888 return (sync); 889 } 890 891 static int 892 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags) 893 { 894 struct pfsync_softc *sc = V_pfsyncif; 895 struct pfsync_state *sa, *sp; 896 struct pf_kstate *st; 897 int sync; 898 899 struct mbuf *mp; 900 int len = count * sizeof(*sp); 901 int offp, i; 902 903 mp = m_pulldown(m, offset, len, &offp); 904 if (mp == NULL) { 905 V_pfsyncstats.pfsyncs_badlen++; 906 return (-1); 907 } 908 sa = (struct pfsync_state *)(mp->m_data + offp); 909 910 for (i = 0; i < count; i++) { 911 sp = &sa[i]; 912 913 /* check for invalid values */ 914 if (sp->timeout >= PFTM_MAX || 915 sp->src.state > PF_TCPS_PROXY_DST || 916 sp->dst.state > PF_TCPS_PROXY_DST) { 917 if (V_pf_status.debug >= PF_DEBUG_MISC) { 918 printf("pfsync_input: PFSYNC_ACT_UPD: " 919 "invalid value\n"); 920 } 921 V_pfsyncstats.pfsyncs_badval++; 922 continue; 923 } 924 925 st = pf_find_state_byid(sp->id, sp->creatorid); 926 if (st == NULL) { 927 /* insert the update */ 928 if (pfsync_state_import(sp, flags)) 929 V_pfsyncstats.pfsyncs_badstate++; 930 continue; 931 } 932 933 if (st->state_flags & PFSTATE_ACK) { 934 pfsync_undefer_state(st, 1); 935 } 936 937 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 938 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 939 else { 940 sync = 0; 941 942 /* 943 * Non-TCP protocol state machine always go 944 * forwards 945 */ 946 if (st->src.state > sp->src.state) 947 sync++; 948 else 949 pf_state_peer_ntoh(&sp->src, &st->src); 950 if (st->dst.state > sp->dst.state) 951 sync++; 952 else 953 pf_state_peer_ntoh(&sp->dst, &st->dst); 954 } 955 if (sync < 2) { 956 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 957 pf_state_peer_ntoh(&sp->dst, &st->dst); 958 st->expire = time_uptime; 959 st->timeout = sp->timeout; 960 } 961 st->pfsync_time = time_uptime; 962 963 if (sync) { 964 V_pfsyncstats.pfsyncs_stale++; 965 966 pfsync_update_state(st); 967 PF_STATE_UNLOCK(st); 968 pfsync_push_all(sc); 969 continue; 970 } 971 PF_STATE_UNLOCK(st); 972 } 973 974 return (len); 975 } 976 977 static int 978 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags) 979 { 980 struct pfsync_softc *sc = V_pfsyncif; 981 struct pfsync_upd_c *ua, *up; 982 struct pf_kstate *st; 983 int len = count * sizeof(*up); 984 int sync; 985 struct mbuf *mp; 986 int offp, i; 987 988 mp = m_pulldown(m, offset, len, &offp); 989 if (mp == NULL) { 990 V_pfsyncstats.pfsyncs_badlen++; 991 return (-1); 992 } 993 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 994 995 for (i = 0; i < count; i++) { 996 up = &ua[i]; 997 998 /* check for invalid values */ 999 if (up->timeout >= PFTM_MAX || 1000 up->src.state > PF_TCPS_PROXY_DST || 1001 up->dst.state > PF_TCPS_PROXY_DST) { 1002 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1003 printf("pfsync_input: " 1004 "PFSYNC_ACT_UPD_C: " 1005 "invalid value\n"); 1006 } 1007 V_pfsyncstats.pfsyncs_badval++; 1008 continue; 1009 } 1010 1011 st = pf_find_state_byid(up->id, up->creatorid); 1012 if (st == NULL) { 1013 /* We don't have this state. Ask for it. */ 1014 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1015 pfsync_request_update(up->creatorid, up->id); 1016 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1017 continue; 1018 } 1019 1020 if (st->state_flags & PFSTATE_ACK) { 1021 pfsync_undefer_state(st, 1); 1022 } 1023 1024 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1025 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1026 else { 1027 sync = 0; 1028 1029 /* 1030 * Non-TCP protocol state machine always go 1031 * forwards 1032 */ 1033 if (st->src.state > up->src.state) 1034 sync++; 1035 else 1036 pf_state_peer_ntoh(&up->src, &st->src); 1037 if (st->dst.state > up->dst.state) 1038 sync++; 1039 else 1040 pf_state_peer_ntoh(&up->dst, &st->dst); 1041 } 1042 if (sync < 2) { 1043 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1044 pf_state_peer_ntoh(&up->dst, &st->dst); 1045 st->expire = time_uptime; 1046 st->timeout = up->timeout; 1047 } 1048 st->pfsync_time = time_uptime; 1049 1050 if (sync) { 1051 V_pfsyncstats.pfsyncs_stale++; 1052 1053 pfsync_update_state(st); 1054 PF_STATE_UNLOCK(st); 1055 pfsync_push_all(sc); 1056 continue; 1057 } 1058 PF_STATE_UNLOCK(st); 1059 } 1060 1061 return (len); 1062 } 1063 1064 static int 1065 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags) 1066 { 1067 struct pfsync_upd_req *ur, *ura; 1068 struct mbuf *mp; 1069 int len = count * sizeof(*ur); 1070 int i, offp; 1071 1072 struct pf_kstate *st; 1073 1074 mp = m_pulldown(m, offset, len, &offp); 1075 if (mp == NULL) { 1076 V_pfsyncstats.pfsyncs_badlen++; 1077 return (-1); 1078 } 1079 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1080 1081 for (i = 0; i < count; i++) { 1082 ur = &ura[i]; 1083 1084 if (ur->id == 0 && ur->creatorid == 0) 1085 pfsync_bulk_start(); 1086 else { 1087 st = pf_find_state_byid(ur->id, ur->creatorid); 1088 if (st == NULL) { 1089 V_pfsyncstats.pfsyncs_badstate++; 1090 continue; 1091 } 1092 if (st->state_flags & PFSTATE_NOSYNC) { 1093 PF_STATE_UNLOCK(st); 1094 continue; 1095 } 1096 1097 pfsync_update_state_req(st); 1098 PF_STATE_UNLOCK(st); 1099 } 1100 } 1101 1102 return (len); 1103 } 1104 1105 static int 1106 pfsync_in_del(struct mbuf *m, int offset, int count, int flags) 1107 { 1108 struct mbuf *mp; 1109 struct pfsync_state *sa, *sp; 1110 struct pf_kstate *st; 1111 int len = count * sizeof(*sp); 1112 int offp, i; 1113 1114 mp = m_pulldown(m, offset, len, &offp); 1115 if (mp == NULL) { 1116 V_pfsyncstats.pfsyncs_badlen++; 1117 return (-1); 1118 } 1119 sa = (struct pfsync_state *)(mp->m_data + offp); 1120 1121 for (i = 0; i < count; i++) { 1122 sp = &sa[i]; 1123 1124 st = pf_find_state_byid(sp->id, sp->creatorid); 1125 if (st == NULL) { 1126 V_pfsyncstats.pfsyncs_badstate++; 1127 continue; 1128 } 1129 st->state_flags |= PFSTATE_NOSYNC; 1130 pf_unlink_state(st); 1131 } 1132 1133 return (len); 1134 } 1135 1136 static int 1137 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags) 1138 { 1139 struct mbuf *mp; 1140 struct pfsync_del_c *sa, *sp; 1141 struct pf_kstate *st; 1142 int len = count * sizeof(*sp); 1143 int offp, i; 1144 1145 mp = m_pulldown(m, offset, len, &offp); 1146 if (mp == NULL) { 1147 V_pfsyncstats.pfsyncs_badlen++; 1148 return (-1); 1149 } 1150 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1151 1152 for (i = 0; i < count; i++) { 1153 sp = &sa[i]; 1154 1155 st = pf_find_state_byid(sp->id, sp->creatorid); 1156 if (st == NULL) { 1157 V_pfsyncstats.pfsyncs_badstate++; 1158 continue; 1159 } 1160 1161 st->state_flags |= PFSTATE_NOSYNC; 1162 pf_unlink_state(st); 1163 } 1164 1165 return (len); 1166 } 1167 1168 static int 1169 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags) 1170 { 1171 struct pfsync_softc *sc = V_pfsyncif; 1172 struct pfsync_bus *bus; 1173 struct mbuf *mp; 1174 int len = count * sizeof(*bus); 1175 int offp; 1176 1177 PFSYNC_BLOCK(sc); 1178 1179 /* If we're not waiting for a bulk update, who cares. */ 1180 if (sc->sc_ureq_sent == 0) { 1181 PFSYNC_BUNLOCK(sc); 1182 return (len); 1183 } 1184 1185 mp = m_pulldown(m, offset, len, &offp); 1186 if (mp == NULL) { 1187 PFSYNC_BUNLOCK(sc); 1188 V_pfsyncstats.pfsyncs_badlen++; 1189 return (-1); 1190 } 1191 bus = (struct pfsync_bus *)(mp->m_data + offp); 1192 1193 switch (bus->status) { 1194 case PFSYNC_BUS_START: 1195 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1196 V_pf_limits[PF_LIMIT_STATES].limit / 1197 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1198 sizeof(struct pfsync_state)), 1199 pfsync_bulk_fail, sc); 1200 if (V_pf_status.debug >= PF_DEBUG_MISC) 1201 printf("pfsync: received bulk update start\n"); 1202 break; 1203 1204 case PFSYNC_BUS_END: 1205 if (time_uptime - ntohl(bus->endtime) >= 1206 sc->sc_ureq_sent) { 1207 /* that's it, we're happy */ 1208 sc->sc_ureq_sent = 0; 1209 sc->sc_bulk_tries = 0; 1210 callout_stop(&sc->sc_bulkfail_tmo); 1211 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1212 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1213 "pfsync bulk done"); 1214 sc->sc_flags |= PFSYNCF_OK; 1215 if (V_pf_status.debug >= PF_DEBUG_MISC) 1216 printf("pfsync: received valid " 1217 "bulk update end\n"); 1218 } else { 1219 if (V_pf_status.debug >= PF_DEBUG_MISC) 1220 printf("pfsync: received invalid " 1221 "bulk update end: bad timestamp\n"); 1222 } 1223 break; 1224 } 1225 PFSYNC_BUNLOCK(sc); 1226 1227 return (len); 1228 } 1229 1230 static int 1231 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags) 1232 { 1233 int len = count * sizeof(struct pfsync_tdb); 1234 1235 #if defined(IPSEC) 1236 struct pfsync_tdb *tp; 1237 struct mbuf *mp; 1238 int offp; 1239 int i; 1240 int s; 1241 1242 mp = m_pulldown(m, offset, len, &offp); 1243 if (mp == NULL) { 1244 V_pfsyncstats.pfsyncs_badlen++; 1245 return (-1); 1246 } 1247 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1248 1249 for (i = 0; i < count; i++) 1250 pfsync_update_net_tdb(&tp[i]); 1251 #endif 1252 1253 return (len); 1254 } 1255 1256 #if defined(IPSEC) 1257 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1258 static void 1259 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1260 { 1261 struct tdb *tdb; 1262 int s; 1263 1264 /* check for invalid values */ 1265 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1266 (pt->dst.sa.sa_family != AF_INET && 1267 pt->dst.sa.sa_family != AF_INET6)) 1268 goto bad; 1269 1270 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1271 if (tdb) { 1272 pt->rpl = ntohl(pt->rpl); 1273 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1274 1275 /* Neither replay nor byte counter should ever decrease. */ 1276 if (pt->rpl < tdb->tdb_rpl || 1277 pt->cur_bytes < tdb->tdb_cur_bytes) { 1278 goto bad; 1279 } 1280 1281 tdb->tdb_rpl = pt->rpl; 1282 tdb->tdb_cur_bytes = pt->cur_bytes; 1283 } 1284 return; 1285 1286 bad: 1287 if (V_pf_status.debug >= PF_DEBUG_MISC) 1288 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1289 "invalid value\n"); 1290 V_pfsyncstats.pfsyncs_badstate++; 1291 return; 1292 } 1293 #endif 1294 1295 static int 1296 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags) 1297 { 1298 /* check if we are at the right place in the packet */ 1299 if (offset != m->m_pkthdr.len) 1300 V_pfsyncstats.pfsyncs_badlen++; 1301 1302 /* we're done. free and let the caller return */ 1303 m_freem(m); 1304 return (-1); 1305 } 1306 1307 static int 1308 pfsync_in_error(struct mbuf *m, int offset, int count, int flags) 1309 { 1310 V_pfsyncstats.pfsyncs_badact++; 1311 1312 m_freem(m); 1313 return (-1); 1314 } 1315 1316 static int 1317 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1318 struct route *rt) 1319 { 1320 m_freem(m); 1321 return (0); 1322 } 1323 1324 /* ARGSUSED */ 1325 static int 1326 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1327 { 1328 struct pfsync_softc *sc = ifp->if_softc; 1329 struct ifreq *ifr = (struct ifreq *)data; 1330 struct pfsyncreq pfsyncr; 1331 size_t nvbuflen; 1332 int error; 1333 int c; 1334 1335 switch (cmd) { 1336 case SIOCSIFFLAGS: 1337 PFSYNC_LOCK(sc); 1338 if (ifp->if_flags & IFF_UP) { 1339 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1340 PFSYNC_UNLOCK(sc); 1341 pfsync_pointers_init(); 1342 } else { 1343 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1344 PFSYNC_UNLOCK(sc); 1345 pfsync_pointers_uninit(); 1346 } 1347 break; 1348 case SIOCSIFMTU: 1349 if (!sc->sc_sync_if || 1350 ifr->ifr_mtu <= PFSYNC_MINPKT || 1351 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1352 return (EINVAL); 1353 if (ifr->ifr_mtu < ifp->if_mtu) { 1354 for (c = 0; c < pfsync_buckets; c++) { 1355 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1356 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1357 pfsync_sendout(1, c); 1358 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1359 } 1360 } 1361 ifp->if_mtu = ifr->ifr_mtu; 1362 break; 1363 case SIOCGETPFSYNC: 1364 bzero(&pfsyncr, sizeof(pfsyncr)); 1365 PFSYNC_LOCK(sc); 1366 if (sc->sc_sync_if) { 1367 strlcpy(pfsyncr.pfsyncr_syncdev, 1368 sc->sc_sync_if->if_xname, IFNAMSIZ); 1369 } 1370 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1371 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1372 pfsyncr.pfsyncr_defer = sc->sc_flags; 1373 PFSYNC_UNLOCK(sc); 1374 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1375 sizeof(pfsyncr))); 1376 1377 case SIOCGETPFSYNCNV: 1378 { 1379 nvlist_t *nvl_syncpeer; 1380 nvlist_t *nvl = nvlist_create(0); 1381 1382 if (nvl == NULL) 1383 return (ENOMEM); 1384 1385 if (sc->sc_sync_if) 1386 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1387 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1388 nvlist_add_number(nvl, "flags", sc->sc_flags); 1389 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1390 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1391 1392 void *packed = NULL; 1393 packed = nvlist_pack(nvl, &nvbuflen); 1394 if (packed == NULL) { 1395 free(packed, M_NVLIST); 1396 nvlist_destroy(nvl); 1397 return (ENOMEM); 1398 } 1399 1400 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1401 ifr->ifr_cap_nv.length = nvbuflen; 1402 ifr->ifr_cap_nv.buffer = NULL; 1403 free(packed, M_NVLIST); 1404 nvlist_destroy(nvl); 1405 return (EFBIG); 1406 } 1407 1408 ifr->ifr_cap_nv.length = nvbuflen; 1409 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1410 1411 nvlist_destroy(nvl); 1412 nvlist_destroy(nvl_syncpeer); 1413 free(packed, M_NVLIST); 1414 break; 1415 } 1416 1417 case SIOCSETPFSYNC: 1418 { 1419 struct pfsync_kstatus status; 1420 1421 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1422 return (error); 1423 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1424 sizeof(pfsyncr)))) 1425 return (error); 1426 1427 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1428 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1429 1430 error = pfsync_kstatus_to_softc(&status, sc); 1431 return (error); 1432 } 1433 case SIOCSETPFSYNCNV: 1434 { 1435 struct pfsync_kstatus status; 1436 void *data; 1437 nvlist_t *nvl; 1438 1439 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1440 return (error); 1441 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1442 return (EINVAL); 1443 1444 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1445 1446 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1447 ifr->ifr_cap_nv.length)) != 0) { 1448 free(data, M_TEMP); 1449 return (error); 1450 } 1451 1452 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1453 free(data, M_TEMP); 1454 return (EINVAL); 1455 } 1456 1457 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1458 pfsync_nvstatus_to_kstatus(nvl, &status); 1459 1460 nvlist_destroy(nvl); 1461 free(data, M_TEMP); 1462 1463 error = pfsync_kstatus_to_softc(&status, sc); 1464 return (error); 1465 } 1466 default: 1467 return (ENOTTY); 1468 } 1469 1470 return (0); 1471 } 1472 1473 static void 1474 pfsync_out_state(struct pf_kstate *st, void *buf) 1475 { 1476 struct pfsync_state *sp = buf; 1477 1478 pfsync_state_export(sp, st); 1479 } 1480 1481 static void 1482 pfsync_out_iack(struct pf_kstate *st, void *buf) 1483 { 1484 struct pfsync_ins_ack *iack = buf; 1485 1486 iack->id = st->id; 1487 iack->creatorid = st->creatorid; 1488 } 1489 1490 static void 1491 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1492 { 1493 struct pfsync_upd_c *up = buf; 1494 1495 bzero(up, sizeof(*up)); 1496 up->id = st->id; 1497 pf_state_peer_hton(&st->src, &up->src); 1498 pf_state_peer_hton(&st->dst, &up->dst); 1499 up->creatorid = st->creatorid; 1500 up->timeout = st->timeout; 1501 } 1502 1503 static void 1504 pfsync_out_del(struct pf_kstate *st, void *buf) 1505 { 1506 struct pfsync_del_c *dp = buf; 1507 1508 dp->id = st->id; 1509 dp->creatorid = st->creatorid; 1510 st->state_flags |= PFSTATE_NOSYNC; 1511 } 1512 1513 static void 1514 pfsync_drop(struct pfsync_softc *sc) 1515 { 1516 struct pf_kstate *st, *next; 1517 struct pfsync_upd_req_item *ur; 1518 struct pfsync_bucket *b; 1519 int c, q; 1520 1521 for (c = 0; c < pfsync_buckets; c++) { 1522 b = &sc->sc_buckets[c]; 1523 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1524 if (TAILQ_EMPTY(&b->b_qs[q])) 1525 continue; 1526 1527 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1528 KASSERT(st->sync_state == q, 1529 ("%s: st->sync_state == q", 1530 __func__)); 1531 st->sync_state = PFSYNC_S_NONE; 1532 pf_release_state(st); 1533 } 1534 TAILQ_INIT(&b->b_qs[q]); 1535 } 1536 1537 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1538 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1539 free(ur, M_PFSYNC); 1540 } 1541 1542 b->b_len = PFSYNC_MINPKT; 1543 b->b_plus = NULL; 1544 } 1545 } 1546 1547 static void 1548 pfsync_sendout(int schedswi, int c) 1549 { 1550 struct pfsync_softc *sc = V_pfsyncif; 1551 struct ifnet *ifp = sc->sc_ifp; 1552 struct mbuf *m; 1553 struct pfsync_header *ph; 1554 struct pfsync_subheader *subh; 1555 struct pf_kstate *st, *st_next; 1556 struct pfsync_upd_req_item *ur; 1557 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1558 int aflen, offset; 1559 int q, count = 0; 1560 1561 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1562 KASSERT(b->b_len > PFSYNC_MINPKT, 1563 ("%s: sc_len %zu", __func__, b->b_len)); 1564 PFSYNC_BUCKET_LOCK_ASSERT(b); 1565 1566 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1567 pfsync_drop(sc); 1568 return; 1569 } 1570 1571 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1572 if (m == NULL) { 1573 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1574 V_pfsyncstats.pfsyncs_onomem++; 1575 return; 1576 } 1577 m->m_data += max_linkhdr; 1578 m->m_len = m->m_pkthdr.len = b->b_len; 1579 1580 /* build the ip header */ 1581 switch (sc->sc_sync_peer.ss_family) { 1582 #ifdef INET 1583 case AF_INET: 1584 { 1585 struct ip *ip; 1586 1587 ip = mtod(m, struct ip *); 1588 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1589 aflen = offset = sizeof(*ip); 1590 1591 ip->ip_len = htons(m->m_pkthdr.len); 1592 ip_fillid(ip); 1593 break; 1594 } 1595 #endif 1596 default: 1597 m_freem(m); 1598 return; 1599 } 1600 1601 1602 /* build the pfsync header */ 1603 ph = (struct pfsync_header *)(m->m_data + offset); 1604 bzero(ph, sizeof(*ph)); 1605 offset += sizeof(*ph); 1606 1607 ph->version = PFSYNC_VERSION; 1608 ph->len = htons(b->b_len - aflen); 1609 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1610 1611 /* walk the queues */ 1612 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1613 if (TAILQ_EMPTY(&b->b_qs[q])) 1614 continue; 1615 1616 subh = (struct pfsync_subheader *)(m->m_data + offset); 1617 offset += sizeof(*subh); 1618 1619 count = 0; 1620 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1621 KASSERT(st->sync_state == q, 1622 ("%s: st->sync_state == q", 1623 __func__)); 1624 /* 1625 * XXXGL: some of write methods do unlocked reads 1626 * of state data :( 1627 */ 1628 pfsync_qs[q].write(st, m->m_data + offset); 1629 offset += pfsync_qs[q].len; 1630 st->sync_state = PFSYNC_S_NONE; 1631 pf_release_state(st); 1632 count++; 1633 } 1634 TAILQ_INIT(&b->b_qs[q]); 1635 1636 bzero(subh, sizeof(*subh)); 1637 subh->action = pfsync_qs[q].action; 1638 subh->count = htons(count); 1639 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1640 } 1641 1642 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1643 subh = (struct pfsync_subheader *)(m->m_data + offset); 1644 offset += sizeof(*subh); 1645 1646 count = 0; 1647 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1648 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1649 1650 bcopy(&ur->ur_msg, m->m_data + offset, 1651 sizeof(ur->ur_msg)); 1652 offset += sizeof(ur->ur_msg); 1653 free(ur, M_PFSYNC); 1654 count++; 1655 } 1656 1657 bzero(subh, sizeof(*subh)); 1658 subh->action = PFSYNC_ACT_UPD_REQ; 1659 subh->count = htons(count); 1660 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1661 } 1662 1663 /* has someone built a custom region for us to add? */ 1664 if (b->b_plus != NULL) { 1665 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 1666 offset += b->b_pluslen; 1667 1668 b->b_plus = NULL; 1669 } 1670 1671 subh = (struct pfsync_subheader *)(m->m_data + offset); 1672 offset += sizeof(*subh); 1673 1674 bzero(subh, sizeof(*subh)); 1675 subh->action = PFSYNC_ACT_EOF; 1676 subh->count = htons(1); 1677 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1678 1679 /* we're done, let's put it on the wire */ 1680 if (ifp->if_bpf) { 1681 m->m_data += aflen; 1682 m->m_len = m->m_pkthdr.len = b->b_len - aflen; 1683 BPF_MTAP(ifp, m); 1684 m->m_data -= aflen; 1685 m->m_len = m->m_pkthdr.len = b->b_len; 1686 } 1687 1688 if (sc->sc_sync_if == NULL) { 1689 b->b_len = PFSYNC_MINPKT; 1690 m_freem(m); 1691 return; 1692 } 1693 1694 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1695 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1696 b->b_len = PFSYNC_MINPKT; 1697 1698 if (!_IF_QFULL(&b->b_snd)) 1699 _IF_ENQUEUE(&b->b_snd, m); 1700 else { 1701 m_freem(m); 1702 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1703 } 1704 if (schedswi) 1705 swi_sched(V_pfsync_swi_cookie, 0); 1706 } 1707 1708 static void 1709 pfsync_insert_state(struct pf_kstate *st) 1710 { 1711 struct pfsync_softc *sc = V_pfsyncif; 1712 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1713 1714 if (st->state_flags & PFSTATE_NOSYNC) 1715 return; 1716 1717 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1718 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1719 st->state_flags |= PFSTATE_NOSYNC; 1720 return; 1721 } 1722 1723 KASSERT(st->sync_state == PFSYNC_S_NONE, 1724 ("%s: st->sync_state %u", __func__, st->sync_state)); 1725 1726 PFSYNC_BUCKET_LOCK(b); 1727 if (b->b_len == PFSYNC_MINPKT) 1728 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1729 1730 pfsync_q_ins(st, PFSYNC_S_INS, true); 1731 PFSYNC_BUCKET_UNLOCK(b); 1732 1733 st->sync_updates = 0; 1734 } 1735 1736 static int 1737 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 1738 { 1739 struct pfsync_softc *sc = V_pfsyncif; 1740 struct pfsync_deferral *pd; 1741 struct pfsync_bucket *b; 1742 1743 if (m->m_flags & (M_BCAST|M_MCAST)) 1744 return (0); 1745 1746 if (sc == NULL) 1747 return (0); 1748 1749 b = pfsync_get_bucket(sc, st); 1750 1751 PFSYNC_LOCK(sc); 1752 1753 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 1754 !(sc->sc_flags & PFSYNCF_DEFER)) { 1755 PFSYNC_UNLOCK(sc); 1756 return (0); 1757 } 1758 1759 PFSYNC_BUCKET_LOCK(b); 1760 PFSYNC_UNLOCK(sc); 1761 1762 if (b->b_deferred >= 128) 1763 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 1764 1765 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1766 if (pd == NULL) { 1767 PFSYNC_BUCKET_UNLOCK(b); 1768 return (0); 1769 } 1770 b->b_deferred++; 1771 1772 m->m_flags |= M_SKIP_FIREWALL; 1773 st->state_flags |= PFSTATE_ACK; 1774 1775 pd->pd_sc = sc; 1776 pd->pd_refs = 0; 1777 pd->pd_st = st; 1778 pf_ref_state(st); 1779 pd->pd_m = m; 1780 1781 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 1782 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 1783 callout_reset(&pd->pd_tmo, PFSYNC_DEFER_TIMEOUT, pfsync_defer_tmo, pd); 1784 1785 pfsync_push(b); 1786 PFSYNC_BUCKET_UNLOCK(b); 1787 1788 return (1); 1789 } 1790 1791 static void 1792 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1793 { 1794 struct pfsync_softc *sc = pd->pd_sc; 1795 struct mbuf *m = pd->pd_m; 1796 struct pf_kstate *st = pd->pd_st; 1797 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1798 1799 PFSYNC_BUCKET_LOCK_ASSERT(b); 1800 1801 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1802 b->b_deferred--; 1803 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1804 free(pd, M_PFSYNC); 1805 pf_release_state(st); 1806 1807 if (drop) 1808 m_freem(m); 1809 else { 1810 _IF_ENQUEUE(&b->b_snd, m); 1811 pfsync_push(b); 1812 } 1813 } 1814 1815 static void 1816 pfsync_defer_tmo(void *arg) 1817 { 1818 struct epoch_tracker et; 1819 struct pfsync_deferral *pd = arg; 1820 struct pfsync_softc *sc = pd->pd_sc; 1821 struct mbuf *m = pd->pd_m; 1822 struct pf_kstate *st = pd->pd_st; 1823 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1824 1825 PFSYNC_BUCKET_LOCK_ASSERT(b); 1826 1827 if (sc->sc_sync_if == NULL) 1828 return; 1829 1830 NET_EPOCH_ENTER(et); 1831 CURVNET_SET(sc->sc_sync_if->if_vnet); 1832 1833 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1834 b->b_deferred--; 1835 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1836 if (pd->pd_refs == 0) 1837 free(pd, M_PFSYNC); 1838 PFSYNC_BUCKET_UNLOCK(b); 1839 1840 switch (sc->sc_sync_peer.ss_family) { 1841 #ifdef INET 1842 case AF_INET: 1843 ip_output(m, NULL, NULL, 0, NULL, NULL); 1844 break; 1845 #endif 1846 } 1847 1848 pf_release_state(st); 1849 1850 CURVNET_RESTORE(); 1851 NET_EPOCH_EXIT(et); 1852 } 1853 1854 static void 1855 pfsync_undefer_state(struct pf_kstate *st, int drop) 1856 { 1857 struct pfsync_softc *sc = V_pfsyncif; 1858 struct pfsync_deferral *pd; 1859 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1860 1861 PFSYNC_BUCKET_LOCK(b); 1862 1863 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 1864 if (pd->pd_st == st) { 1865 if (callout_stop(&pd->pd_tmo) > 0) 1866 pfsync_undefer(pd, drop); 1867 1868 PFSYNC_BUCKET_UNLOCK(b); 1869 return; 1870 } 1871 } 1872 PFSYNC_BUCKET_UNLOCK(b); 1873 1874 panic("%s: unable to find deferred state", __func__); 1875 } 1876 1877 static struct pfsync_bucket* 1878 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 1879 { 1880 int c = PF_IDHASH(st) % pfsync_buckets; 1881 return &sc->sc_buckets[c]; 1882 } 1883 1884 static void 1885 pfsync_update_state(struct pf_kstate *st) 1886 { 1887 struct pfsync_softc *sc = V_pfsyncif; 1888 bool sync = false, ref = true; 1889 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1890 1891 PF_STATE_LOCK_ASSERT(st); 1892 PFSYNC_BUCKET_LOCK(b); 1893 1894 if (st->state_flags & PFSTATE_ACK) 1895 pfsync_undefer_state(st, 0); 1896 if (st->state_flags & PFSTATE_NOSYNC) { 1897 if (st->sync_state != PFSYNC_S_NONE) 1898 pfsync_q_del(st, true, b); 1899 PFSYNC_BUCKET_UNLOCK(b); 1900 return; 1901 } 1902 1903 if (b->b_len == PFSYNC_MINPKT) 1904 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1905 1906 switch (st->sync_state) { 1907 case PFSYNC_S_UPD_C: 1908 case PFSYNC_S_UPD: 1909 case PFSYNC_S_INS: 1910 /* we're already handling it */ 1911 1912 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1913 st->sync_updates++; 1914 if (st->sync_updates >= sc->sc_maxupdates) 1915 sync = true; 1916 } 1917 break; 1918 1919 case PFSYNC_S_IACK: 1920 pfsync_q_del(st, false, b); 1921 ref = false; 1922 /* FALLTHROUGH */ 1923 1924 case PFSYNC_S_NONE: 1925 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1926 st->sync_updates = 0; 1927 break; 1928 1929 default: 1930 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1931 } 1932 1933 if (sync || (time_uptime - st->pfsync_time) < 2) 1934 pfsync_push(b); 1935 1936 PFSYNC_BUCKET_UNLOCK(b); 1937 } 1938 1939 static void 1940 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1941 { 1942 struct pfsync_softc *sc = V_pfsyncif; 1943 struct pfsync_bucket *b = &sc->sc_buckets[0]; 1944 struct pfsync_upd_req_item *item; 1945 size_t nlen = sizeof(struct pfsync_upd_req); 1946 1947 PFSYNC_BUCKET_LOCK_ASSERT(b); 1948 1949 /* 1950 * This code does a bit to prevent multiple update requests for the 1951 * same state being generated. It searches current subheader queue, 1952 * but it doesn't lookup into queue of already packed datagrams. 1953 */ 1954 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 1955 if (item->ur_msg.id == id && 1956 item->ur_msg.creatorid == creatorid) 1957 return; 1958 1959 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1960 if (item == NULL) 1961 return; /* XXX stats */ 1962 1963 item->ur_msg.id = id; 1964 item->ur_msg.creatorid = creatorid; 1965 1966 if (TAILQ_EMPTY(&b->b_upd_req_list)) 1967 nlen += sizeof(struct pfsync_subheader); 1968 1969 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 1970 pfsync_sendout(0, 0); 1971 1972 nlen = sizeof(struct pfsync_subheader) + 1973 sizeof(struct pfsync_upd_req); 1974 } 1975 1976 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 1977 b->b_len += nlen; 1978 1979 pfsync_push(b); 1980 } 1981 1982 static bool 1983 pfsync_update_state_req(struct pf_kstate *st) 1984 { 1985 struct pfsync_softc *sc = V_pfsyncif; 1986 bool ref = true, full = false; 1987 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1988 1989 PF_STATE_LOCK_ASSERT(st); 1990 PFSYNC_BUCKET_LOCK(b); 1991 1992 if (st->state_flags & PFSTATE_NOSYNC) { 1993 if (st->sync_state != PFSYNC_S_NONE) 1994 pfsync_q_del(st, true, b); 1995 PFSYNC_BUCKET_UNLOCK(b); 1996 return (full); 1997 } 1998 1999 switch (st->sync_state) { 2000 case PFSYNC_S_UPD_C: 2001 case PFSYNC_S_IACK: 2002 pfsync_q_del(st, false, b); 2003 ref = false; 2004 /* FALLTHROUGH */ 2005 2006 case PFSYNC_S_NONE: 2007 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2008 pfsync_push(b); 2009 break; 2010 2011 case PFSYNC_S_INS: 2012 case PFSYNC_S_UPD: 2013 case PFSYNC_S_DEL: 2014 /* we're already handling it */ 2015 break; 2016 2017 default: 2018 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2019 } 2020 2021 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state)) 2022 full = true; 2023 2024 PFSYNC_BUCKET_UNLOCK(b); 2025 2026 return (full); 2027 } 2028 2029 static void 2030 pfsync_delete_state(struct pf_kstate *st) 2031 { 2032 struct pfsync_softc *sc = V_pfsyncif; 2033 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2034 bool ref = true; 2035 2036 PFSYNC_BUCKET_LOCK(b); 2037 if (st->state_flags & PFSTATE_ACK) 2038 pfsync_undefer_state(st, 1); 2039 if (st->state_flags & PFSTATE_NOSYNC) { 2040 if (st->sync_state != PFSYNC_S_NONE) 2041 pfsync_q_del(st, true, b); 2042 PFSYNC_BUCKET_UNLOCK(b); 2043 return; 2044 } 2045 2046 if (b->b_len == PFSYNC_MINPKT) 2047 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2048 2049 switch (st->sync_state) { 2050 case PFSYNC_S_INS: 2051 /* We never got to tell the world so just forget about it. */ 2052 pfsync_q_del(st, true, b); 2053 break; 2054 2055 case PFSYNC_S_UPD_C: 2056 case PFSYNC_S_UPD: 2057 case PFSYNC_S_IACK: 2058 pfsync_q_del(st, false, b); 2059 ref = false; 2060 /* FALLTHROUGH */ 2061 2062 case PFSYNC_S_NONE: 2063 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 2064 break; 2065 2066 default: 2067 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2068 } 2069 2070 PFSYNC_BUCKET_UNLOCK(b); 2071 } 2072 2073 static void 2074 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2075 { 2076 struct { 2077 struct pfsync_subheader subh; 2078 struct pfsync_clr clr; 2079 } __packed r; 2080 2081 bzero(&r, sizeof(r)); 2082 2083 r.subh.action = PFSYNC_ACT_CLR; 2084 r.subh.count = htons(1); 2085 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2086 2087 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2088 r.clr.creatorid = creatorid; 2089 2090 pfsync_send_plus(&r, sizeof(r)); 2091 } 2092 2093 static void 2094 pfsync_q_ins(struct pf_kstate *st, int q, bool ref) 2095 { 2096 struct pfsync_softc *sc = V_pfsyncif; 2097 size_t nlen = pfsync_qs[q].len; 2098 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2099 2100 PFSYNC_BUCKET_LOCK_ASSERT(b); 2101 2102 KASSERT(st->sync_state == PFSYNC_S_NONE, 2103 ("%s: st->sync_state %u", __func__, st->sync_state)); 2104 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2105 b->b_len)); 2106 2107 if (TAILQ_EMPTY(&b->b_qs[q])) 2108 nlen += sizeof(struct pfsync_subheader); 2109 2110 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2111 pfsync_sendout(1, b->b_id); 2112 2113 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2114 } 2115 2116 b->b_len += nlen; 2117 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2118 st->sync_state = q; 2119 if (ref) 2120 pf_ref_state(st); 2121 } 2122 2123 static void 2124 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2125 { 2126 int q = st->sync_state; 2127 2128 PFSYNC_BUCKET_LOCK_ASSERT(b); 2129 KASSERT(st->sync_state != PFSYNC_S_NONE, 2130 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2131 2132 b->b_len -= pfsync_qs[q].len; 2133 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2134 st->sync_state = PFSYNC_S_NONE; 2135 if (unref) 2136 pf_release_state(st); 2137 2138 if (TAILQ_EMPTY(&b->b_qs[q])) 2139 b->b_len -= sizeof(struct pfsync_subheader); 2140 } 2141 2142 static void 2143 pfsync_bulk_start(void) 2144 { 2145 struct pfsync_softc *sc = V_pfsyncif; 2146 2147 if (V_pf_status.debug >= PF_DEBUG_MISC) 2148 printf("pfsync: received bulk update request\n"); 2149 2150 PFSYNC_BLOCK(sc); 2151 2152 sc->sc_ureq_received = time_uptime; 2153 sc->sc_bulk_hashid = 0; 2154 sc->sc_bulk_stateid = 0; 2155 pfsync_bulk_status(PFSYNC_BUS_START); 2156 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2157 PFSYNC_BUNLOCK(sc); 2158 } 2159 2160 static void 2161 pfsync_bulk_update(void *arg) 2162 { 2163 struct pfsync_softc *sc = arg; 2164 struct pf_kstate *s; 2165 int i; 2166 2167 PFSYNC_BLOCK_ASSERT(sc); 2168 CURVNET_SET(sc->sc_ifp->if_vnet); 2169 2170 /* 2171 * Start with last state from previous invocation. 2172 * It may had gone, in this case start from the 2173 * hash slot. 2174 */ 2175 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2176 2177 if (s != NULL) 2178 i = PF_IDHASH(s); 2179 else 2180 i = sc->sc_bulk_hashid; 2181 2182 for (; i <= pf_hashmask; i++) { 2183 struct pf_idhash *ih = &V_pf_idhash[i]; 2184 2185 if (s != NULL) 2186 PF_HASHROW_ASSERT(ih); 2187 else { 2188 PF_HASHROW_LOCK(ih); 2189 s = LIST_FIRST(&ih->states); 2190 } 2191 2192 for (; s; s = LIST_NEXT(s, entry)) { 2193 if (s->sync_state == PFSYNC_S_NONE && 2194 s->timeout < PFTM_MAX && 2195 s->pfsync_time <= sc->sc_ureq_received) { 2196 if (pfsync_update_state_req(s)) { 2197 /* We've filled a packet. */ 2198 sc->sc_bulk_hashid = i; 2199 sc->sc_bulk_stateid = s->id; 2200 sc->sc_bulk_creatorid = s->creatorid; 2201 PF_HASHROW_UNLOCK(ih); 2202 callout_reset(&sc->sc_bulk_tmo, 1, 2203 pfsync_bulk_update, sc); 2204 goto full; 2205 } 2206 } 2207 } 2208 PF_HASHROW_UNLOCK(ih); 2209 } 2210 2211 /* We're done. */ 2212 pfsync_bulk_status(PFSYNC_BUS_END); 2213 full: 2214 CURVNET_RESTORE(); 2215 } 2216 2217 static void 2218 pfsync_bulk_status(u_int8_t status) 2219 { 2220 struct { 2221 struct pfsync_subheader subh; 2222 struct pfsync_bus bus; 2223 } __packed r; 2224 2225 struct pfsync_softc *sc = V_pfsyncif; 2226 2227 bzero(&r, sizeof(r)); 2228 2229 r.subh.action = PFSYNC_ACT_BUS; 2230 r.subh.count = htons(1); 2231 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2232 2233 r.bus.creatorid = V_pf_status.hostid; 2234 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2235 r.bus.status = status; 2236 2237 pfsync_send_plus(&r, sizeof(r)); 2238 } 2239 2240 static void 2241 pfsync_bulk_fail(void *arg) 2242 { 2243 struct pfsync_softc *sc = arg; 2244 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2245 2246 CURVNET_SET(sc->sc_ifp->if_vnet); 2247 2248 PFSYNC_BLOCK_ASSERT(sc); 2249 2250 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2251 /* Try again */ 2252 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2253 pfsync_bulk_fail, V_pfsyncif); 2254 PFSYNC_BUCKET_LOCK(b); 2255 pfsync_request_update(0, 0); 2256 PFSYNC_BUCKET_UNLOCK(b); 2257 } else { 2258 /* Pretend like the transfer was ok. */ 2259 sc->sc_ureq_sent = 0; 2260 sc->sc_bulk_tries = 0; 2261 PFSYNC_LOCK(sc); 2262 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2263 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2264 "pfsync bulk fail"); 2265 sc->sc_flags |= PFSYNCF_OK; 2266 PFSYNC_UNLOCK(sc); 2267 if (V_pf_status.debug >= PF_DEBUG_MISC) 2268 printf("pfsync: failed to receive bulk update\n"); 2269 } 2270 2271 CURVNET_RESTORE(); 2272 } 2273 2274 static void 2275 pfsync_send_plus(void *plus, size_t pluslen) 2276 { 2277 struct pfsync_softc *sc = V_pfsyncif; 2278 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2279 2280 PFSYNC_BUCKET_LOCK(b); 2281 2282 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2283 pfsync_sendout(1, b->b_id); 2284 2285 b->b_plus = plus; 2286 b->b_len += (b->b_pluslen = pluslen); 2287 2288 pfsync_sendout(1, b->b_id); 2289 PFSYNC_BUCKET_UNLOCK(b); 2290 } 2291 2292 static void 2293 pfsync_timeout(void *arg) 2294 { 2295 struct pfsync_bucket *b = arg; 2296 2297 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2298 PFSYNC_BUCKET_LOCK(b); 2299 pfsync_push(b); 2300 PFSYNC_BUCKET_UNLOCK(b); 2301 CURVNET_RESTORE(); 2302 } 2303 2304 static void 2305 pfsync_push(struct pfsync_bucket *b) 2306 { 2307 2308 PFSYNC_BUCKET_LOCK_ASSERT(b); 2309 2310 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2311 swi_sched(V_pfsync_swi_cookie, 0); 2312 } 2313 2314 static void 2315 pfsync_push_all(struct pfsync_softc *sc) 2316 { 2317 int c; 2318 struct pfsync_bucket *b; 2319 2320 for (c = 0; c < pfsync_buckets; c++) { 2321 b = &sc->sc_buckets[c]; 2322 2323 PFSYNC_BUCKET_LOCK(b); 2324 pfsync_push(b); 2325 PFSYNC_BUCKET_UNLOCK(b); 2326 } 2327 } 2328 2329 static void 2330 pfsyncintr(void *arg) 2331 { 2332 struct epoch_tracker et; 2333 struct pfsync_softc *sc = arg; 2334 struct pfsync_bucket *b; 2335 struct mbuf *m, *n; 2336 int c, error; 2337 2338 NET_EPOCH_ENTER(et); 2339 CURVNET_SET(sc->sc_ifp->if_vnet); 2340 2341 for (c = 0; c < pfsync_buckets; c++) { 2342 b = &sc->sc_buckets[c]; 2343 2344 PFSYNC_BUCKET_LOCK(b); 2345 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2346 pfsync_sendout(0, b->b_id); 2347 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2348 } 2349 _IF_DEQUEUE_ALL(&b->b_snd, m); 2350 PFSYNC_BUCKET_UNLOCK(b); 2351 2352 for (; m != NULL; m = n) { 2353 n = m->m_nextpkt; 2354 m->m_nextpkt = NULL; 2355 2356 /* 2357 * We distinguish between a deferral packet and our 2358 * own pfsync packet based on M_SKIP_FIREWALL 2359 * flag. This is XXX. 2360 */ 2361 switch (sc->sc_sync_peer.ss_family) { 2362 #ifdef INET 2363 case AF_INET: 2364 if (m->m_flags & M_SKIP_FIREWALL) { 2365 error = ip_output(m, NULL, NULL, 0, 2366 NULL, NULL); 2367 } else { 2368 error = ip_output(m, NULL, NULL, 2369 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2370 } 2371 break; 2372 #endif 2373 } 2374 2375 if (error == 0) 2376 V_pfsyncstats.pfsyncs_opackets++; 2377 else 2378 V_pfsyncstats.pfsyncs_oerrors++; 2379 } 2380 } 2381 CURVNET_RESTORE(); 2382 NET_EPOCH_EXIT(et); 2383 } 2384 2385 static int 2386 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2387 struct in_mfilter *imf) 2388 { 2389 struct ip_moptions *imo = &sc->sc_imo; 2390 int error; 2391 2392 if (!(ifp->if_flags & IFF_MULTICAST)) 2393 return (EADDRNOTAVAIL); 2394 2395 switch (sc->sc_sync_peer.ss_family) { 2396 #ifdef INET 2397 case AF_INET: 2398 { 2399 ip_mfilter_init(&imo->imo_head); 2400 imo->imo_multicast_vif = -1; 2401 if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2402 &imf->imf_inm)) != 0) 2403 return (error); 2404 2405 ip_mfilter_insert(&imo->imo_head, imf); 2406 imo->imo_multicast_ifp = ifp; 2407 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2408 imo->imo_multicast_loop = 0; 2409 break; 2410 } 2411 #endif 2412 } 2413 2414 return (0); 2415 } 2416 2417 static void 2418 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2419 { 2420 struct ip_moptions *imo = &sc->sc_imo; 2421 struct in_mfilter *imf; 2422 2423 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2424 ip_mfilter_remove(&imo->imo_head, imf); 2425 in_leavegroup(imf->imf_inm, NULL); 2426 ip_mfilter_free(imf); 2427 } 2428 imo->imo_multicast_ifp = NULL; 2429 } 2430 2431 void 2432 pfsync_detach_ifnet(struct ifnet *ifp) 2433 { 2434 struct pfsync_softc *sc = V_pfsyncif; 2435 2436 if (sc == NULL) 2437 return; 2438 2439 PFSYNC_LOCK(sc); 2440 2441 if (sc->sc_sync_if == ifp) { 2442 /* We don't need mutlicast cleanup here, because the interface 2443 * is going away. We do need to ensure we don't try to do 2444 * cleanup later. 2445 */ 2446 ip_mfilter_init(&sc->sc_imo.imo_head); 2447 sc->sc_imo.imo_multicast_ifp = NULL; 2448 sc->sc_sync_if = NULL; 2449 } 2450 2451 PFSYNC_UNLOCK(sc); 2452 } 2453 2454 static int 2455 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2456 { 2457 struct sockaddr_storage sa; 2458 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2459 status->flags = pfsyncr->pfsyncr_defer; 2460 2461 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2462 2463 memset(&sa, 0, sizeof(sa)); 2464 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2465 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2466 in->sin_family = AF_INET; 2467 in->sin_len = sizeof(*in); 2468 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2469 } 2470 status->syncpeer = sa; 2471 2472 return 0; 2473 } 2474 2475 static int 2476 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2477 { 2478 struct in_mfilter *imf = NULL; 2479 struct ifnet *sifp; 2480 struct ip *ip; 2481 int error; 2482 int c; 2483 2484 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2485 return (EINVAL); 2486 2487 if (status->syncdev[0] == '\0') 2488 sifp = NULL; 2489 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2490 return (EINVAL); 2491 2492 struct sockaddr_in *status_sin = 2493 (struct sockaddr_in *)&(status->syncpeer); 2494 if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || 2495 status_sin->sin_addr.s_addr == 2496 htonl(INADDR_PFSYNC_GROUP))) 2497 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2498 2499 PFSYNC_LOCK(sc); 2500 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 2501 sc_sin->sin_family = AF_INET; 2502 sc_sin->sin_len = sizeof(*sc_sin); 2503 if (status_sin->sin_addr.s_addr == 0) { 2504 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 2505 } else { 2506 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 2507 } 2508 2509 sc->sc_maxupdates = status->maxupdates; 2510 if (status->flags & PFSYNCF_DEFER) { 2511 sc->sc_flags |= PFSYNCF_DEFER; 2512 V_pfsync_defer_ptr = pfsync_defer; 2513 } else { 2514 sc->sc_flags &= ~PFSYNCF_DEFER; 2515 V_pfsync_defer_ptr = NULL; 2516 } 2517 2518 if (sifp == NULL) { 2519 if (sc->sc_sync_if) 2520 if_rele(sc->sc_sync_if); 2521 sc->sc_sync_if = NULL; 2522 pfsync_multicast_cleanup(sc); 2523 PFSYNC_UNLOCK(sc); 2524 return (0); 2525 } 2526 2527 for (c = 0; c < pfsync_buckets; c++) { 2528 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 2529 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 2530 (sifp->if_mtu < sc->sc_ifp->if_mtu || 2531 (sc->sc_sync_if != NULL && 2532 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 2533 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 2534 pfsync_sendout(1, c); 2535 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 2536 } 2537 2538 pfsync_multicast_cleanup(sc); 2539 2540 if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 2541 error = pfsync_multicast_setup(sc, sifp, imf); 2542 if (error) { 2543 if_rele(sifp); 2544 ip_mfilter_free(imf); 2545 PFSYNC_UNLOCK(sc); 2546 return (error); 2547 } 2548 } 2549 if (sc->sc_sync_if) 2550 if_rele(sc->sc_sync_if); 2551 sc->sc_sync_if = sifp; 2552 2553 ip = &sc->sc_template.ipv4; 2554 bzero(ip, sizeof(*ip)); 2555 ip->ip_v = IPVERSION; 2556 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 2557 ip->ip_tos = IPTOS_LOWDELAY; 2558 /* len and id are set later. */ 2559 ip->ip_off = htons(IP_DF); 2560 ip->ip_ttl = PFSYNC_DFLTTL; 2561 ip->ip_p = IPPROTO_PFSYNC; 2562 ip->ip_src.s_addr = INADDR_ANY; 2563 ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; 2564 2565 /* Request a full state table update. */ 2566 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2567 (*carp_demote_adj_p)(V_pfsync_carp_adj, 2568 "pfsync bulk start"); 2569 sc->sc_flags &= ~PFSYNCF_OK; 2570 if (V_pf_status.debug >= PF_DEBUG_MISC) 2571 printf("pfsync: requesting bulk update\n"); 2572 PFSYNC_UNLOCK(sc); 2573 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 2574 pfsync_request_update(0, 0); 2575 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 2576 PFSYNC_BLOCK(sc); 2577 sc->sc_ureq_sent = time_uptime; 2578 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 2579 PFSYNC_BUNLOCK(sc); 2580 return (0); 2581 } 2582 2583 static void 2584 pfsync_pointers_init(void) 2585 { 2586 2587 PF_RULES_WLOCK(); 2588 V_pfsync_state_import_ptr = pfsync_state_import; 2589 V_pfsync_insert_state_ptr = pfsync_insert_state; 2590 V_pfsync_update_state_ptr = pfsync_update_state; 2591 V_pfsync_delete_state_ptr = pfsync_delete_state; 2592 V_pfsync_clear_states_ptr = pfsync_clear_states; 2593 V_pfsync_defer_ptr = pfsync_defer; 2594 PF_RULES_WUNLOCK(); 2595 } 2596 2597 static void 2598 pfsync_pointers_uninit(void) 2599 { 2600 2601 PF_RULES_WLOCK(); 2602 V_pfsync_state_import_ptr = NULL; 2603 V_pfsync_insert_state_ptr = NULL; 2604 V_pfsync_update_state_ptr = NULL; 2605 V_pfsync_delete_state_ptr = NULL; 2606 V_pfsync_clear_states_ptr = NULL; 2607 V_pfsync_defer_ptr = NULL; 2608 PF_RULES_WUNLOCK(); 2609 } 2610 2611 static void 2612 vnet_pfsync_init(const void *unused __unused) 2613 { 2614 int error; 2615 2616 V_pfsync_cloner = if_clone_simple(pfsyncname, 2617 pfsync_clone_create, pfsync_clone_destroy, 1); 2618 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 2619 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2620 if (error) { 2621 if_clone_detach(V_pfsync_cloner); 2622 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2623 } 2624 2625 pfsync_pointers_init(); 2626 } 2627 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2628 vnet_pfsync_init, NULL); 2629 2630 static void 2631 vnet_pfsync_uninit(const void *unused __unused) 2632 { 2633 int ret __diagused; 2634 2635 pfsync_pointers_uninit(); 2636 2637 if_clone_detach(V_pfsync_cloner); 2638 ret = swi_remove(V_pfsync_swi_cookie); 2639 MPASS(ret == 0); 2640 ret = intr_event_destroy(V_pfsync_swi_ie); 2641 MPASS(ret == 0); 2642 } 2643 2644 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 2645 vnet_pfsync_uninit, NULL); 2646 2647 static int 2648 pfsync_init(void) 2649 { 2650 #ifdef INET 2651 int error; 2652 2653 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 2654 2655 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 2656 if (error) 2657 return (error); 2658 #endif 2659 2660 return (0); 2661 } 2662 2663 static void 2664 pfsync_uninit(void) 2665 { 2666 pfsync_detach_ifnet_ptr = NULL; 2667 2668 #ifdef INET 2669 ipproto_unregister(IPPROTO_PFSYNC); 2670 #endif 2671 } 2672 2673 static int 2674 pfsync_modevent(module_t mod, int type, void *data) 2675 { 2676 int error = 0; 2677 2678 switch (type) { 2679 case MOD_LOAD: 2680 error = pfsync_init(); 2681 break; 2682 case MOD_UNLOAD: 2683 pfsync_uninit(); 2684 break; 2685 default: 2686 error = EINVAL; 2687 break; 2688 } 2689 2690 return (error); 2691 } 2692 2693 static moduledata_t pfsync_mod = { 2694 pfsyncname, 2695 pfsync_modevent, 2696 0 2697 }; 2698 2699 #define PFSYNC_MODVER 1 2700 2701 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2702 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2703 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2704 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2705