1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/nv.h> 79 #include <sys/priv.h> 80 #include <sys/smp.h> 81 #include <sys/socket.h> 82 #include <sys/sockio.h> 83 #include <sys/sysctl.h> 84 #include <sys/syslog.h> 85 86 #include <net/bpf.h> 87 #include <net/if.h> 88 #include <net/if_var.h> 89 #include <net/if_clone.h> 90 #include <net/if_private.h> 91 #include <net/if_types.h> 92 #include <net/vnet.h> 93 #include <net/pfvar.h> 94 #include <net/if_pfsync.h> 95 96 #include <netinet/if_ether.h> 97 #include <netinet/in.h> 98 #include <netinet/in_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip_carp.h> 101 #include <netinet/ip_var.h> 102 #include <netinet/tcp.h> 103 #include <netinet/tcp_fsm.h> 104 #include <netinet/tcp_seq.h> 105 106 #include <netinet/ip6.h> 107 #include <netinet6/ip6_var.h> 108 109 #include <netpfil/pf/pfsync_nv.h> 110 111 struct pfsync_bucket; 112 struct pfsync_softc; 113 114 union inet_template { 115 struct ip ipv4; 116 }; 117 118 #define PFSYNC_MINPKT ( \ 119 sizeof(union inet_template) + \ 120 sizeof(struct pfsync_header) + \ 121 sizeof(struct pfsync_subheader) ) 122 123 static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, 124 struct pfsync_state_peer *); 125 static int pfsync_in_clr(struct mbuf *, int, int, int); 126 static int pfsync_in_ins(struct mbuf *, int, int, int); 127 static int pfsync_in_iack(struct mbuf *, int, int, int); 128 static int pfsync_in_upd(struct mbuf *, int, int, int); 129 static int pfsync_in_upd_c(struct mbuf *, int, int, int); 130 static int pfsync_in_ureq(struct mbuf *, int, int, int); 131 static int pfsync_in_del(struct mbuf *, int, int, int); 132 static int pfsync_in_del_c(struct mbuf *, int, int, int); 133 static int pfsync_in_bus(struct mbuf *, int, int, int); 134 static int pfsync_in_tdb(struct mbuf *, int, int, int); 135 static int pfsync_in_eof(struct mbuf *, int, int, int); 136 static int pfsync_in_error(struct mbuf *, int, int, int); 137 138 static int (*pfsync_acts[])(struct mbuf *, int, int, int) = { 139 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 140 pfsync_in_ins, /* PFSYNC_ACT_INS */ 141 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 142 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 143 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 144 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 145 pfsync_in_del, /* PFSYNC_ACT_DEL */ 146 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 147 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 148 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 149 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 150 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 151 pfsync_in_eof /* PFSYNC_ACT_EOF */ 152 }; 153 154 struct pfsync_q { 155 void (*write)(struct pf_kstate *, void *); 156 size_t len; 157 u_int8_t action; 158 }; 159 160 /* we have one of these for every PFSYNC_S_ */ 161 static void pfsync_out_state(struct pf_kstate *, void *); 162 static void pfsync_out_iack(struct pf_kstate *, void *); 163 static void pfsync_out_upd_c(struct pf_kstate *, void *); 164 static void pfsync_out_del(struct pf_kstate *, void *); 165 166 static struct pfsync_q pfsync_qs[] = { 167 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 168 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 169 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 170 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 171 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 172 }; 173 174 static void pfsync_q_ins(struct pf_kstate *, int, bool); 175 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 176 177 static void pfsync_update_state(struct pf_kstate *); 178 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 179 180 struct pfsync_upd_req_item { 181 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 182 struct pfsync_upd_req ur_msg; 183 }; 184 185 struct pfsync_deferral { 186 struct pfsync_softc *pd_sc; 187 TAILQ_ENTRY(pfsync_deferral) pd_entry; 188 struct callout pd_tmo; 189 190 struct pf_kstate *pd_st; 191 struct mbuf *pd_m; 192 }; 193 194 struct pfsync_bucket 195 { 196 int b_id; 197 struct pfsync_softc *b_sc; 198 struct mtx b_mtx; 199 struct callout b_tmo; 200 int b_flags; 201 #define PFSYNCF_BUCKET_PUSH 0x00000001 202 203 size_t b_len; 204 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_S_COUNT]; 205 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 206 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 207 u_int b_deferred; 208 void *b_plus; 209 size_t b_pluslen; 210 211 struct ifaltq b_snd; 212 }; 213 214 struct pfsync_softc { 215 /* Configuration */ 216 struct ifnet *sc_ifp; 217 struct ifnet *sc_sync_if; 218 struct ip_moptions sc_imo; 219 struct sockaddr_storage sc_sync_peer; 220 uint32_t sc_flags; 221 uint8_t sc_maxupdates; 222 union inet_template sc_template; 223 struct mtx sc_mtx; 224 225 /* Queued data */ 226 struct pfsync_bucket *sc_buckets; 227 228 /* Bulk update info */ 229 struct mtx sc_bulk_mtx; 230 uint32_t sc_ureq_sent; 231 int sc_bulk_tries; 232 uint32_t sc_ureq_received; 233 int sc_bulk_hashid; 234 uint64_t sc_bulk_stateid; 235 uint32_t sc_bulk_creatorid; 236 struct callout sc_bulk_tmo; 237 struct callout sc_bulkfail_tmo; 238 }; 239 240 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 241 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 242 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 243 244 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 245 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 246 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 247 248 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 249 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 250 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 251 252 static const char pfsyncname[] = "pfsync"; 253 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 254 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 255 #define V_pfsyncif VNET(pfsyncif) 256 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 257 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 258 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 259 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 260 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 261 #define V_pfsyncstats VNET(pfsyncstats) 262 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 263 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 264 265 static void pfsync_timeout(void *); 266 static void pfsync_push(struct pfsync_bucket *); 267 static void pfsync_push_all(struct pfsync_softc *); 268 static void pfsyncintr(void *); 269 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 270 struct in_mfilter *imf); 271 static void pfsync_multicast_cleanup(struct pfsync_softc *); 272 static void pfsync_pointers_init(void); 273 static void pfsync_pointers_uninit(void); 274 static int pfsync_init(void); 275 static void pfsync_uninit(void); 276 277 static unsigned long pfsync_buckets; 278 279 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 280 "PFSYNC"); 281 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 282 &VNET_NAME(pfsyncstats), pfsyncstats, 283 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 284 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 285 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 286 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 287 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 288 289 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 290 static void pfsync_clone_destroy(struct ifnet *); 291 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 292 struct pf_state_peer *); 293 static int pfsyncoutput(struct ifnet *, struct mbuf *, 294 const struct sockaddr *, struct route *); 295 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 296 297 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 298 static void pfsync_undefer(struct pfsync_deferral *, int); 299 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 300 static void pfsync_undefer_state(struct pf_kstate *, int); 301 static void pfsync_defer_tmo(void *); 302 303 static void pfsync_request_update(u_int32_t, u_int64_t); 304 static bool pfsync_update_state_req(struct pf_kstate *); 305 306 static void pfsync_drop(struct pfsync_softc *); 307 static void pfsync_sendout(int, int); 308 static void pfsync_send_plus(void *, size_t); 309 310 static void pfsync_bulk_start(void); 311 static void pfsync_bulk_status(u_int8_t); 312 static void pfsync_bulk_update(void *); 313 static void pfsync_bulk_fail(void *); 314 315 static void pfsync_detach_ifnet(struct ifnet *); 316 317 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 318 struct pfsync_kstatus *); 319 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 320 struct pfsync_softc *); 321 322 #ifdef IPSEC 323 static void pfsync_update_net_tdb(struct pfsync_tdb *); 324 #endif 325 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 326 struct pf_kstate *); 327 328 #define PFSYNC_MAX_BULKTRIES 12 329 #define PFSYNC_DEFER_TIMEOUT ((20 * hz) / 1000) 330 331 VNET_DEFINE(struct if_clone *, pfsync_cloner); 332 #define V_pfsync_cloner VNET(pfsync_cloner) 333 334 static int 335 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 336 { 337 struct pfsync_softc *sc; 338 struct ifnet *ifp; 339 struct pfsync_bucket *b; 340 int c, q; 341 342 if (unit != 0) 343 return (EINVAL); 344 345 if (! pfsync_buckets) 346 pfsync_buckets = mp_ncpus * 2; 347 348 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 349 sc->sc_flags |= PFSYNCF_OK; 350 sc->sc_maxupdates = 128; 351 352 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 353 if (ifp == NULL) { 354 free(sc, M_PFSYNC); 355 return (ENOSPC); 356 } 357 if_initname(ifp, pfsyncname, unit); 358 ifp->if_softc = sc; 359 ifp->if_ioctl = pfsyncioctl; 360 ifp->if_output = pfsyncoutput; 361 ifp->if_type = IFT_PFSYNC; 362 ifp->if_hdrlen = sizeof(struct pfsync_header); 363 ifp->if_mtu = ETHERMTU; 364 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 365 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 366 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 367 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 368 369 if_attach(ifp); 370 371 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 372 373 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 374 M_PFSYNC, M_ZERO | M_WAITOK); 375 for (c = 0; c < pfsync_buckets; c++) { 376 b = &sc->sc_buckets[c]; 377 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 378 379 b->b_id = c; 380 b->b_sc = sc; 381 b->b_len = PFSYNC_MINPKT; 382 383 for (q = 0; q < PFSYNC_S_COUNT; q++) 384 TAILQ_INIT(&b->b_qs[q]); 385 386 TAILQ_INIT(&b->b_upd_req_list); 387 TAILQ_INIT(&b->b_deferrals); 388 389 callout_init(&b->b_tmo, 1); 390 391 b->b_snd.ifq_maxlen = ifqmaxlen; 392 } 393 394 V_pfsyncif = sc; 395 396 return (0); 397 } 398 399 static void 400 pfsync_clone_destroy(struct ifnet *ifp) 401 { 402 struct pfsync_softc *sc = ifp->if_softc; 403 struct pfsync_bucket *b; 404 int c, ret; 405 406 for (c = 0; c < pfsync_buckets; c++) { 407 b = &sc->sc_buckets[c]; 408 /* 409 * At this stage, everything should have already been 410 * cleared by pfsync_uninit(), and we have only to 411 * drain callouts. 412 */ 413 PFSYNC_BUCKET_LOCK(b); 414 while (b->b_deferred > 0) { 415 struct pfsync_deferral *pd = 416 TAILQ_FIRST(&b->b_deferrals); 417 418 ret = callout_stop(&pd->pd_tmo); 419 PFSYNC_BUCKET_UNLOCK(b); 420 if (ret > 0) { 421 pfsync_undefer(pd, 1); 422 } else { 423 callout_drain(&pd->pd_tmo); 424 } 425 PFSYNC_BUCKET_LOCK(b); 426 } 427 MPASS(b->b_deferred == 0); 428 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 429 PFSYNC_BUCKET_UNLOCK(b); 430 431 callout_drain(&b->b_tmo); 432 } 433 434 callout_drain(&sc->sc_bulkfail_tmo); 435 callout_drain(&sc->sc_bulk_tmo); 436 437 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 438 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 439 bpfdetach(ifp); 440 if_detach(ifp); 441 442 pfsync_drop(sc); 443 444 if_free(ifp); 445 pfsync_multicast_cleanup(sc); 446 mtx_destroy(&sc->sc_mtx); 447 mtx_destroy(&sc->sc_bulk_mtx); 448 449 free(sc->sc_buckets, M_PFSYNC); 450 free(sc, M_PFSYNC); 451 452 V_pfsyncif = NULL; 453 } 454 455 static int 456 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 457 struct pf_state_peer *d) 458 { 459 if (s->scrub.scrub_flag && d->scrub == NULL) { 460 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 461 if (d->scrub == NULL) 462 return (ENOMEM); 463 } 464 465 return (0); 466 } 467 468 static int 469 pfsync_state_import(struct pfsync_state *sp, int flags) 470 { 471 struct pfsync_softc *sc = V_pfsyncif; 472 #ifndef __NO_STRICT_ALIGNMENT 473 struct pfsync_state_key key[2]; 474 #endif 475 struct pfsync_state_key *kw, *ks; 476 struct pf_kstate *st = NULL; 477 struct pf_state_key *skw = NULL, *sks = NULL; 478 struct pf_krule *r = NULL; 479 struct pfi_kkif *kif; 480 int error; 481 482 PF_RULES_RASSERT(); 483 484 if (sp->creatorid == 0) { 485 if (V_pf_status.debug >= PF_DEBUG_MISC) 486 printf("%s: invalid creator id: %08x\n", __func__, 487 ntohl(sp->creatorid)); 488 return (EINVAL); 489 } 490 491 if ((kif = pfi_kkif_find(sp->ifname)) == NULL) { 492 if (V_pf_status.debug >= PF_DEBUG_MISC) 493 printf("%s: unknown interface: %s\n", __func__, 494 sp->ifname); 495 if (flags & PFSYNC_SI_IOCTL) 496 return (EINVAL); 497 return (0); /* skip this state */ 498 } 499 500 /* 501 * If the ruleset checksums match or the state is coming from the ioctl, 502 * it's safe to associate the state with the rule of that number. 503 */ 504 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 505 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 506 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 507 r = pf_main_ruleset.rules[ 508 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 509 else 510 r = &V_pf_default_rule; 511 512 if ((r->max_states && 513 counter_u64_fetch(r->states_cur) >= r->max_states)) 514 goto cleanup; 515 516 /* 517 * XXXGL: consider M_WAITOK in ioctl path after. 518 */ 519 st = pf_alloc_state(M_NOWAIT); 520 if (__predict_false(st == NULL)) 521 goto cleanup; 522 523 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 524 goto cleanup; 525 526 #ifndef __NO_STRICT_ALIGNMENT 527 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 528 kw = &key[PF_SK_WIRE]; 529 ks = &key[PF_SK_STACK]; 530 #else 531 kw = &sp->key[PF_SK_WIRE]; 532 ks = &sp->key[PF_SK_STACK]; 533 #endif 534 535 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 536 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 537 kw->port[0] != ks->port[0] || 538 kw->port[1] != ks->port[1]) { 539 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 540 if (sks == NULL) 541 goto cleanup; 542 } else 543 sks = skw; 544 545 /* allocate memory for scrub info */ 546 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 547 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 548 goto cleanup; 549 550 /* Copy to state key(s). */ 551 skw->addr[0] = kw->addr[0]; 552 skw->addr[1] = kw->addr[1]; 553 skw->port[0] = kw->port[0]; 554 skw->port[1] = kw->port[1]; 555 skw->proto = sp->proto; 556 skw->af = sp->af; 557 if (sks != skw) { 558 sks->addr[0] = ks->addr[0]; 559 sks->addr[1] = ks->addr[1]; 560 sks->port[0] = ks->port[0]; 561 sks->port[1] = ks->port[1]; 562 sks->proto = sp->proto; 563 sks->af = sp->af; 564 } 565 566 /* copy to state */ 567 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 568 st->creation = time_uptime - ntohl(sp->creation); 569 st->expire = time_uptime; 570 if (sp->expire) { 571 uint32_t timeout; 572 573 timeout = r->timeout[sp->timeout]; 574 if (!timeout) 575 timeout = V_pf_default_rule.timeout[sp->timeout]; 576 577 /* sp->expire may have been adaptively scaled by export. */ 578 st->expire -= timeout - ntohl(sp->expire); 579 } 580 581 st->direction = sp->direction; 582 st->log = sp->log; 583 st->timeout = sp->timeout; 584 /* 8 from old peers, 16 bits from new peers */ 585 st->state_flags = sp->state_flags_compat | ntohs(sp->state_flags); 586 587 if (r == &V_pf_default_rule) { 588 /* ToS and Prio are not sent over struct pfsync_state */ 589 st->state_flags &= ~PFSTATE_SETMASK; 590 } else { 591 /* Most actions are applied form state, not from rule. Until 592 * pfsync can forward all those actions and their parameters we 593 * must relay on restoring them from the found rule. 594 * It's a copy of pf_rule_to_actions() */ 595 st->qid = r->qid; 596 st->pqid = r->pqid; 597 st->rtableid = r->rtableid; 598 if (r->scrub_flags & PFSTATE_SETTOS) 599 st->set_tos = r->set_tos; 600 st->min_ttl = r->min_ttl; 601 st->max_mss = r->max_mss; 602 st->state_flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 603 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 604 st->dnpipe = r->dnpipe; 605 st->dnrpipe = r->dnrpipe; 606 /* FIXME: dnflags are not part of state, can't update them */ 607 } 608 609 st->id = sp->id; 610 st->creatorid = sp->creatorid; 611 pf_state_peer_ntoh(&sp->src, &st->src); 612 pf_state_peer_ntoh(&sp->dst, &st->dst); 613 614 st->rule.ptr = r; 615 st->nat_rule.ptr = NULL; 616 st->anchor.ptr = NULL; 617 st->rt_kif = NULL; 618 619 st->pfsync_time = time_uptime; 620 st->sync_state = PFSYNC_S_NONE; 621 622 if (!(flags & PFSYNC_SI_IOCTL)) 623 st->state_flags |= PFSTATE_NOSYNC; 624 625 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 626 goto cleanup_state; 627 628 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 629 counter_u64_add(r->states_cur, 1); 630 counter_u64_add(r->states_tot, 1); 631 632 if (!(flags & PFSYNC_SI_IOCTL)) { 633 st->state_flags &= ~PFSTATE_NOSYNC; 634 if (st->state_flags & PFSTATE_ACK) { 635 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 636 PFSYNC_BUCKET_LOCK(b); 637 pfsync_q_ins(st, PFSYNC_S_IACK, true); 638 PFSYNC_BUCKET_UNLOCK(b); 639 640 pfsync_push_all(sc); 641 } 642 } 643 st->state_flags &= ~PFSTATE_ACK; 644 PF_STATE_UNLOCK(st); 645 646 return (0); 647 648 cleanup: 649 error = ENOMEM; 650 if (skw == sks) 651 sks = NULL; 652 if (skw != NULL) 653 uma_zfree(V_pf_state_key_z, skw); 654 if (sks != NULL) 655 uma_zfree(V_pf_state_key_z, sks); 656 657 cleanup_state: /* pf_state_insert() frees the state keys. */ 658 if (st) { 659 st->timeout = PFTM_UNLINKED; /* appease an assert */ 660 pf_free_state(st); 661 } 662 return (error); 663 } 664 665 #ifdef INET 666 static int 667 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 668 { 669 struct pfsync_softc *sc = V_pfsyncif; 670 struct mbuf *m = *mp; 671 struct ip *ip = mtod(m, struct ip *); 672 struct pfsync_header *ph; 673 struct pfsync_subheader subh; 674 675 int offset, len, flags = 0; 676 int rv; 677 uint16_t count; 678 679 PF_RULES_RLOCK_TRACKER; 680 681 *mp = NULL; 682 V_pfsyncstats.pfsyncs_ipackets++; 683 684 /* Verify that we have a sync interface configured. */ 685 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 686 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 687 goto done; 688 689 /* verify that the packet came in on the right interface */ 690 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 691 V_pfsyncstats.pfsyncs_badif++; 692 goto done; 693 } 694 695 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 696 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 697 /* verify that the IP TTL is 255. */ 698 if (ip->ip_ttl != PFSYNC_DFLTTL) { 699 V_pfsyncstats.pfsyncs_badttl++; 700 goto done; 701 } 702 703 offset = ip->ip_hl << 2; 704 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 705 V_pfsyncstats.pfsyncs_hdrops++; 706 goto done; 707 } 708 709 if (offset + sizeof(*ph) > m->m_len) { 710 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 711 V_pfsyncstats.pfsyncs_hdrops++; 712 return (IPPROTO_DONE); 713 } 714 ip = mtod(m, struct ip *); 715 } 716 ph = (struct pfsync_header *)((char *)ip + offset); 717 718 /* verify the version */ 719 if (ph->version != PFSYNC_VERSION) { 720 V_pfsyncstats.pfsyncs_badver++; 721 goto done; 722 } 723 724 len = ntohs(ph->len) + offset; 725 if (m->m_pkthdr.len < len) { 726 V_pfsyncstats.pfsyncs_badlen++; 727 goto done; 728 } 729 730 /* 731 * Trusting pf_chksum during packet processing, as well as seeking 732 * in interface name tree, require holding PF_RULES_RLOCK(). 733 */ 734 PF_RULES_RLOCK(); 735 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 736 flags = PFSYNC_SI_CKSUM; 737 738 offset += sizeof(*ph); 739 while (offset <= len - sizeof(subh)) { 740 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 741 offset += sizeof(subh); 742 743 if (subh.action >= PFSYNC_ACT_MAX) { 744 V_pfsyncstats.pfsyncs_badact++; 745 PF_RULES_RUNLOCK(); 746 goto done; 747 } 748 749 count = ntohs(subh.count); 750 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 751 rv = (*pfsync_acts[subh.action])(m, offset, count, flags); 752 if (rv == -1) { 753 PF_RULES_RUNLOCK(); 754 return (IPPROTO_DONE); 755 } 756 757 offset += rv; 758 } 759 PF_RULES_RUNLOCK(); 760 761 done: 762 m_freem(m); 763 return (IPPROTO_DONE); 764 } 765 #endif 766 767 static int 768 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags) 769 { 770 struct pfsync_clr *clr; 771 struct mbuf *mp; 772 int len = sizeof(*clr) * count; 773 int i, offp; 774 u_int32_t creatorid; 775 776 mp = m_pulldown(m, offset, len, &offp); 777 if (mp == NULL) { 778 V_pfsyncstats.pfsyncs_badlen++; 779 return (-1); 780 } 781 clr = (struct pfsync_clr *)(mp->m_data + offp); 782 783 for (i = 0; i < count; i++) { 784 creatorid = clr[i].creatorid; 785 786 if (clr[i].ifname[0] != '\0' && 787 pfi_kkif_find(clr[i].ifname) == NULL) 788 continue; 789 790 for (int i = 0; i <= pf_hashmask; i++) { 791 struct pf_idhash *ih = &V_pf_idhash[i]; 792 struct pf_kstate *s; 793 relock: 794 PF_HASHROW_LOCK(ih); 795 LIST_FOREACH(s, &ih->states, entry) { 796 if (s->creatorid == creatorid) { 797 s->state_flags |= PFSTATE_NOSYNC; 798 pf_unlink_state(s); 799 goto relock; 800 } 801 } 802 PF_HASHROW_UNLOCK(ih); 803 } 804 } 805 806 return (len); 807 } 808 809 static int 810 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags) 811 { 812 struct mbuf *mp; 813 struct pfsync_state *sa, *sp; 814 int len = sizeof(*sp) * count; 815 int i, offp; 816 817 mp = m_pulldown(m, offset, len, &offp); 818 if (mp == NULL) { 819 V_pfsyncstats.pfsyncs_badlen++; 820 return (-1); 821 } 822 sa = (struct pfsync_state *)(mp->m_data + offp); 823 824 for (i = 0; i < count; i++) { 825 sp = &sa[i]; 826 827 /* Check for invalid values. */ 828 if (sp->timeout >= PFTM_MAX || 829 sp->src.state > PF_TCPS_PROXY_DST || 830 sp->dst.state > PF_TCPS_PROXY_DST || 831 sp->direction > PF_OUT || 832 (sp->af != AF_INET && sp->af != AF_INET6)) { 833 if (V_pf_status.debug >= PF_DEBUG_MISC) 834 printf("%s: invalid value\n", __func__); 835 V_pfsyncstats.pfsyncs_badval++; 836 continue; 837 } 838 839 if (pfsync_state_import(sp, flags) == ENOMEM) 840 /* Drop out, but process the rest of the actions. */ 841 break; 842 } 843 844 return (len); 845 } 846 847 static int 848 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags) 849 { 850 struct pfsync_ins_ack *ia, *iaa; 851 struct pf_kstate *st; 852 853 struct mbuf *mp; 854 int len = count * sizeof(*ia); 855 int offp, i; 856 857 mp = m_pulldown(m, offset, len, &offp); 858 if (mp == NULL) { 859 V_pfsyncstats.pfsyncs_badlen++; 860 return (-1); 861 } 862 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 863 864 for (i = 0; i < count; i++) { 865 ia = &iaa[i]; 866 867 st = pf_find_state_byid(ia->id, ia->creatorid); 868 if (st == NULL) 869 continue; 870 871 if (st->state_flags & PFSTATE_ACK) { 872 pfsync_undefer_state(st, 0); 873 } 874 PF_STATE_UNLOCK(st); 875 } 876 /* 877 * XXX this is not yet implemented, but we know the size of the 878 * message so we can skip it. 879 */ 880 881 return (count * sizeof(struct pfsync_ins_ack)); 882 } 883 884 static int 885 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, 886 struct pfsync_state_peer *dst) 887 { 888 int sync = 0; 889 890 PF_STATE_LOCK_ASSERT(st); 891 892 /* 893 * The state should never go backwards except 894 * for syn-proxy states. Neither should the 895 * sequence window slide backwards. 896 */ 897 if ((st->src.state > src->state && 898 (st->src.state < PF_TCPS_PROXY_SRC || 899 src->state >= PF_TCPS_PROXY_SRC)) || 900 901 (st->src.state == src->state && 902 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 903 sync++; 904 else 905 pf_state_peer_ntoh(src, &st->src); 906 907 if ((st->dst.state > dst->state) || 908 909 (st->dst.state >= TCPS_SYN_SENT && 910 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 911 sync++; 912 else 913 pf_state_peer_ntoh(dst, &st->dst); 914 915 return (sync); 916 } 917 918 static int 919 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags) 920 { 921 struct pfsync_softc *sc = V_pfsyncif; 922 struct pfsync_state *sa, *sp; 923 struct pf_kstate *st; 924 int sync; 925 926 struct mbuf *mp; 927 int len = count * sizeof(*sp); 928 int offp, i; 929 930 mp = m_pulldown(m, offset, len, &offp); 931 if (mp == NULL) { 932 V_pfsyncstats.pfsyncs_badlen++; 933 return (-1); 934 } 935 sa = (struct pfsync_state *)(mp->m_data + offp); 936 937 for (i = 0; i < count; i++) { 938 sp = &sa[i]; 939 940 /* check for invalid values */ 941 if (sp->timeout >= PFTM_MAX || 942 sp->src.state > PF_TCPS_PROXY_DST || 943 sp->dst.state > PF_TCPS_PROXY_DST) { 944 if (V_pf_status.debug >= PF_DEBUG_MISC) { 945 printf("pfsync_input: PFSYNC_ACT_UPD: " 946 "invalid value\n"); 947 } 948 V_pfsyncstats.pfsyncs_badval++; 949 continue; 950 } 951 952 st = pf_find_state_byid(sp->id, sp->creatorid); 953 if (st == NULL) { 954 /* insert the update */ 955 if (pfsync_state_import(sp, flags)) 956 V_pfsyncstats.pfsyncs_badstate++; 957 continue; 958 } 959 960 if (st->state_flags & PFSTATE_ACK) { 961 pfsync_undefer_state(st, 1); 962 } 963 964 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 965 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 966 else { 967 sync = 0; 968 969 /* 970 * Non-TCP protocol state machine always go 971 * forwards 972 */ 973 if (st->src.state > sp->src.state) 974 sync++; 975 else 976 pf_state_peer_ntoh(&sp->src, &st->src); 977 if (st->dst.state > sp->dst.state) 978 sync++; 979 else 980 pf_state_peer_ntoh(&sp->dst, &st->dst); 981 } 982 if (sync < 2) { 983 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 984 pf_state_peer_ntoh(&sp->dst, &st->dst); 985 st->expire = time_uptime; 986 st->timeout = sp->timeout; 987 } 988 st->pfsync_time = time_uptime; 989 990 if (sync) { 991 V_pfsyncstats.pfsyncs_stale++; 992 993 pfsync_update_state(st); 994 PF_STATE_UNLOCK(st); 995 pfsync_push_all(sc); 996 continue; 997 } 998 PF_STATE_UNLOCK(st); 999 } 1000 1001 return (len); 1002 } 1003 1004 static int 1005 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags) 1006 { 1007 struct pfsync_softc *sc = V_pfsyncif; 1008 struct pfsync_upd_c *ua, *up; 1009 struct pf_kstate *st; 1010 int len = count * sizeof(*up); 1011 int sync; 1012 struct mbuf *mp; 1013 int offp, i; 1014 1015 mp = m_pulldown(m, offset, len, &offp); 1016 if (mp == NULL) { 1017 V_pfsyncstats.pfsyncs_badlen++; 1018 return (-1); 1019 } 1020 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1021 1022 for (i = 0; i < count; i++) { 1023 up = &ua[i]; 1024 1025 /* check for invalid values */ 1026 if (up->timeout >= PFTM_MAX || 1027 up->src.state > PF_TCPS_PROXY_DST || 1028 up->dst.state > PF_TCPS_PROXY_DST) { 1029 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1030 printf("pfsync_input: " 1031 "PFSYNC_ACT_UPD_C: " 1032 "invalid value\n"); 1033 } 1034 V_pfsyncstats.pfsyncs_badval++; 1035 continue; 1036 } 1037 1038 st = pf_find_state_byid(up->id, up->creatorid); 1039 if (st == NULL) { 1040 /* We don't have this state. Ask for it. */ 1041 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1042 pfsync_request_update(up->creatorid, up->id); 1043 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1044 continue; 1045 } 1046 1047 if (st->state_flags & PFSTATE_ACK) { 1048 pfsync_undefer_state(st, 1); 1049 } 1050 1051 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1052 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1053 else { 1054 sync = 0; 1055 1056 /* 1057 * Non-TCP protocol state machine always go 1058 * forwards 1059 */ 1060 if (st->src.state > up->src.state) 1061 sync++; 1062 else 1063 pf_state_peer_ntoh(&up->src, &st->src); 1064 if (st->dst.state > up->dst.state) 1065 sync++; 1066 else 1067 pf_state_peer_ntoh(&up->dst, &st->dst); 1068 } 1069 if (sync < 2) { 1070 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1071 pf_state_peer_ntoh(&up->dst, &st->dst); 1072 st->expire = time_uptime; 1073 st->timeout = up->timeout; 1074 } 1075 st->pfsync_time = time_uptime; 1076 1077 if (sync) { 1078 V_pfsyncstats.pfsyncs_stale++; 1079 1080 pfsync_update_state(st); 1081 PF_STATE_UNLOCK(st); 1082 pfsync_push_all(sc); 1083 continue; 1084 } 1085 PF_STATE_UNLOCK(st); 1086 } 1087 1088 return (len); 1089 } 1090 1091 static int 1092 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags) 1093 { 1094 struct pfsync_upd_req *ur, *ura; 1095 struct mbuf *mp; 1096 int len = count * sizeof(*ur); 1097 int i, offp; 1098 1099 struct pf_kstate *st; 1100 1101 mp = m_pulldown(m, offset, len, &offp); 1102 if (mp == NULL) { 1103 V_pfsyncstats.pfsyncs_badlen++; 1104 return (-1); 1105 } 1106 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1107 1108 for (i = 0; i < count; i++) { 1109 ur = &ura[i]; 1110 1111 if (ur->id == 0 && ur->creatorid == 0) 1112 pfsync_bulk_start(); 1113 else { 1114 st = pf_find_state_byid(ur->id, ur->creatorid); 1115 if (st == NULL) { 1116 V_pfsyncstats.pfsyncs_badstate++; 1117 continue; 1118 } 1119 if (st->state_flags & PFSTATE_NOSYNC) { 1120 PF_STATE_UNLOCK(st); 1121 continue; 1122 } 1123 1124 pfsync_update_state_req(st); 1125 PF_STATE_UNLOCK(st); 1126 } 1127 } 1128 1129 return (len); 1130 } 1131 1132 static int 1133 pfsync_in_del(struct mbuf *m, int offset, int count, int flags) 1134 { 1135 struct mbuf *mp; 1136 struct pfsync_state *sa, *sp; 1137 struct pf_kstate *st; 1138 int len = count * sizeof(*sp); 1139 int offp, i; 1140 1141 mp = m_pulldown(m, offset, len, &offp); 1142 if (mp == NULL) { 1143 V_pfsyncstats.pfsyncs_badlen++; 1144 return (-1); 1145 } 1146 sa = (struct pfsync_state *)(mp->m_data + offp); 1147 1148 for (i = 0; i < count; i++) { 1149 sp = &sa[i]; 1150 1151 st = pf_find_state_byid(sp->id, sp->creatorid); 1152 if (st == NULL) { 1153 V_pfsyncstats.pfsyncs_badstate++; 1154 continue; 1155 } 1156 st->state_flags |= PFSTATE_NOSYNC; 1157 pf_unlink_state(st); 1158 } 1159 1160 return (len); 1161 } 1162 1163 static int 1164 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags) 1165 { 1166 struct mbuf *mp; 1167 struct pfsync_del_c *sa, *sp; 1168 struct pf_kstate *st; 1169 int len = count * sizeof(*sp); 1170 int offp, i; 1171 1172 mp = m_pulldown(m, offset, len, &offp); 1173 if (mp == NULL) { 1174 V_pfsyncstats.pfsyncs_badlen++; 1175 return (-1); 1176 } 1177 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1178 1179 for (i = 0; i < count; i++) { 1180 sp = &sa[i]; 1181 1182 st = pf_find_state_byid(sp->id, sp->creatorid); 1183 if (st == NULL) { 1184 V_pfsyncstats.pfsyncs_badstate++; 1185 continue; 1186 } 1187 1188 st->state_flags |= PFSTATE_NOSYNC; 1189 pf_unlink_state(st); 1190 } 1191 1192 return (len); 1193 } 1194 1195 static int 1196 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags) 1197 { 1198 struct pfsync_softc *sc = V_pfsyncif; 1199 struct pfsync_bus *bus; 1200 struct mbuf *mp; 1201 int len = count * sizeof(*bus); 1202 int offp; 1203 1204 PFSYNC_BLOCK(sc); 1205 1206 /* If we're not waiting for a bulk update, who cares. */ 1207 if (sc->sc_ureq_sent == 0) { 1208 PFSYNC_BUNLOCK(sc); 1209 return (len); 1210 } 1211 1212 mp = m_pulldown(m, offset, len, &offp); 1213 if (mp == NULL) { 1214 PFSYNC_BUNLOCK(sc); 1215 V_pfsyncstats.pfsyncs_badlen++; 1216 return (-1); 1217 } 1218 bus = (struct pfsync_bus *)(mp->m_data + offp); 1219 1220 switch (bus->status) { 1221 case PFSYNC_BUS_START: 1222 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1223 V_pf_limits[PF_LIMIT_STATES].limit / 1224 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1225 sizeof(struct pfsync_state)), 1226 pfsync_bulk_fail, sc); 1227 if (V_pf_status.debug >= PF_DEBUG_MISC) 1228 printf("pfsync: received bulk update start\n"); 1229 break; 1230 1231 case PFSYNC_BUS_END: 1232 if (time_uptime - ntohl(bus->endtime) >= 1233 sc->sc_ureq_sent) { 1234 /* that's it, we're happy */ 1235 sc->sc_ureq_sent = 0; 1236 sc->sc_bulk_tries = 0; 1237 callout_stop(&sc->sc_bulkfail_tmo); 1238 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1239 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1240 "pfsync bulk done"); 1241 sc->sc_flags |= PFSYNCF_OK; 1242 if (V_pf_status.debug >= PF_DEBUG_MISC) 1243 printf("pfsync: received valid " 1244 "bulk update end\n"); 1245 } else { 1246 if (V_pf_status.debug >= PF_DEBUG_MISC) 1247 printf("pfsync: received invalid " 1248 "bulk update end: bad timestamp\n"); 1249 } 1250 break; 1251 } 1252 PFSYNC_BUNLOCK(sc); 1253 1254 return (len); 1255 } 1256 1257 static int 1258 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags) 1259 { 1260 int len = count * sizeof(struct pfsync_tdb); 1261 1262 #if defined(IPSEC) 1263 struct pfsync_tdb *tp; 1264 struct mbuf *mp; 1265 int offp; 1266 int i; 1267 int s; 1268 1269 mp = m_pulldown(m, offset, len, &offp); 1270 if (mp == NULL) { 1271 V_pfsyncstats.pfsyncs_badlen++; 1272 return (-1); 1273 } 1274 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1275 1276 for (i = 0; i < count; i++) 1277 pfsync_update_net_tdb(&tp[i]); 1278 #endif 1279 1280 return (len); 1281 } 1282 1283 #if defined(IPSEC) 1284 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1285 static void 1286 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1287 { 1288 struct tdb *tdb; 1289 int s; 1290 1291 /* check for invalid values */ 1292 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1293 (pt->dst.sa.sa_family != AF_INET && 1294 pt->dst.sa.sa_family != AF_INET6)) 1295 goto bad; 1296 1297 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1298 if (tdb) { 1299 pt->rpl = ntohl(pt->rpl); 1300 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1301 1302 /* Neither replay nor byte counter should ever decrease. */ 1303 if (pt->rpl < tdb->tdb_rpl || 1304 pt->cur_bytes < tdb->tdb_cur_bytes) { 1305 goto bad; 1306 } 1307 1308 tdb->tdb_rpl = pt->rpl; 1309 tdb->tdb_cur_bytes = pt->cur_bytes; 1310 } 1311 return; 1312 1313 bad: 1314 if (V_pf_status.debug >= PF_DEBUG_MISC) 1315 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1316 "invalid value\n"); 1317 V_pfsyncstats.pfsyncs_badstate++; 1318 return; 1319 } 1320 #endif 1321 1322 static int 1323 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags) 1324 { 1325 /* check if we are at the right place in the packet */ 1326 if (offset != m->m_pkthdr.len) 1327 V_pfsyncstats.pfsyncs_badlen++; 1328 1329 /* we're done. free and let the caller return */ 1330 m_freem(m); 1331 return (-1); 1332 } 1333 1334 static int 1335 pfsync_in_error(struct mbuf *m, int offset, int count, int flags) 1336 { 1337 V_pfsyncstats.pfsyncs_badact++; 1338 1339 m_freem(m); 1340 return (-1); 1341 } 1342 1343 static int 1344 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1345 struct route *rt) 1346 { 1347 m_freem(m); 1348 return (0); 1349 } 1350 1351 /* ARGSUSED */ 1352 static int 1353 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1354 { 1355 struct pfsync_softc *sc = ifp->if_softc; 1356 struct ifreq *ifr = (struct ifreq *)data; 1357 struct pfsyncreq pfsyncr; 1358 size_t nvbuflen; 1359 int error; 1360 int c; 1361 1362 switch (cmd) { 1363 case SIOCSIFFLAGS: 1364 PFSYNC_LOCK(sc); 1365 if (ifp->if_flags & IFF_UP) { 1366 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1367 PFSYNC_UNLOCK(sc); 1368 pfsync_pointers_init(); 1369 } else { 1370 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1371 PFSYNC_UNLOCK(sc); 1372 pfsync_pointers_uninit(); 1373 } 1374 break; 1375 case SIOCSIFMTU: 1376 if (!sc->sc_sync_if || 1377 ifr->ifr_mtu <= PFSYNC_MINPKT || 1378 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1379 return (EINVAL); 1380 if (ifr->ifr_mtu < ifp->if_mtu) { 1381 for (c = 0; c < pfsync_buckets; c++) { 1382 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1383 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1384 pfsync_sendout(1, c); 1385 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1386 } 1387 } 1388 ifp->if_mtu = ifr->ifr_mtu; 1389 break; 1390 case SIOCGETPFSYNC: 1391 bzero(&pfsyncr, sizeof(pfsyncr)); 1392 PFSYNC_LOCK(sc); 1393 if (sc->sc_sync_if) { 1394 strlcpy(pfsyncr.pfsyncr_syncdev, 1395 sc->sc_sync_if->if_xname, IFNAMSIZ); 1396 } 1397 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1398 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1399 pfsyncr.pfsyncr_defer = sc->sc_flags; 1400 PFSYNC_UNLOCK(sc); 1401 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1402 sizeof(pfsyncr))); 1403 1404 case SIOCGETPFSYNCNV: 1405 { 1406 nvlist_t *nvl_syncpeer; 1407 nvlist_t *nvl = nvlist_create(0); 1408 1409 if (nvl == NULL) 1410 return (ENOMEM); 1411 1412 if (sc->sc_sync_if) 1413 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1414 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1415 nvlist_add_number(nvl, "flags", sc->sc_flags); 1416 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1417 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1418 1419 void *packed = NULL; 1420 packed = nvlist_pack(nvl, &nvbuflen); 1421 if (packed == NULL) { 1422 free(packed, M_NVLIST); 1423 nvlist_destroy(nvl); 1424 return (ENOMEM); 1425 } 1426 1427 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1428 ifr->ifr_cap_nv.length = nvbuflen; 1429 ifr->ifr_cap_nv.buffer = NULL; 1430 free(packed, M_NVLIST); 1431 nvlist_destroy(nvl); 1432 return (EFBIG); 1433 } 1434 1435 ifr->ifr_cap_nv.length = nvbuflen; 1436 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1437 1438 nvlist_destroy(nvl); 1439 nvlist_destroy(nvl_syncpeer); 1440 free(packed, M_NVLIST); 1441 break; 1442 } 1443 1444 case SIOCSETPFSYNC: 1445 { 1446 struct pfsync_kstatus status; 1447 1448 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1449 return (error); 1450 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1451 sizeof(pfsyncr)))) 1452 return (error); 1453 1454 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1455 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1456 1457 error = pfsync_kstatus_to_softc(&status, sc); 1458 return (error); 1459 } 1460 case SIOCSETPFSYNCNV: 1461 { 1462 struct pfsync_kstatus status; 1463 void *data; 1464 nvlist_t *nvl; 1465 1466 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1467 return (error); 1468 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1469 return (EINVAL); 1470 1471 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1472 1473 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1474 ifr->ifr_cap_nv.length)) != 0) { 1475 free(data, M_TEMP); 1476 return (error); 1477 } 1478 1479 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1480 free(data, M_TEMP); 1481 return (EINVAL); 1482 } 1483 1484 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1485 pfsync_nvstatus_to_kstatus(nvl, &status); 1486 1487 nvlist_destroy(nvl); 1488 free(data, M_TEMP); 1489 1490 error = pfsync_kstatus_to_softc(&status, sc); 1491 return (error); 1492 } 1493 default: 1494 return (ENOTTY); 1495 } 1496 1497 return (0); 1498 } 1499 1500 static void 1501 pfsync_out_state(struct pf_kstate *st, void *buf) 1502 { 1503 struct pfsync_state *sp = buf; 1504 1505 pfsync_state_export(sp, st); 1506 } 1507 1508 static void 1509 pfsync_out_iack(struct pf_kstate *st, void *buf) 1510 { 1511 struct pfsync_ins_ack *iack = buf; 1512 1513 iack->id = st->id; 1514 iack->creatorid = st->creatorid; 1515 } 1516 1517 static void 1518 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1519 { 1520 struct pfsync_upd_c *up = buf; 1521 1522 bzero(up, sizeof(*up)); 1523 up->id = st->id; 1524 pf_state_peer_hton(&st->src, &up->src); 1525 pf_state_peer_hton(&st->dst, &up->dst); 1526 up->creatorid = st->creatorid; 1527 up->timeout = st->timeout; 1528 } 1529 1530 static void 1531 pfsync_out_del(struct pf_kstate *st, void *buf) 1532 { 1533 struct pfsync_del_c *dp = buf; 1534 1535 dp->id = st->id; 1536 dp->creatorid = st->creatorid; 1537 st->state_flags |= PFSTATE_NOSYNC; 1538 } 1539 1540 static void 1541 pfsync_drop(struct pfsync_softc *sc) 1542 { 1543 struct pf_kstate *st, *next; 1544 struct pfsync_upd_req_item *ur; 1545 struct pfsync_bucket *b; 1546 int c, q; 1547 1548 for (c = 0; c < pfsync_buckets; c++) { 1549 b = &sc->sc_buckets[c]; 1550 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1551 if (TAILQ_EMPTY(&b->b_qs[q])) 1552 continue; 1553 1554 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1555 KASSERT(st->sync_state == q, 1556 ("%s: st->sync_state == q", 1557 __func__)); 1558 st->sync_state = PFSYNC_S_NONE; 1559 pf_release_state(st); 1560 } 1561 TAILQ_INIT(&b->b_qs[q]); 1562 } 1563 1564 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1565 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1566 free(ur, M_PFSYNC); 1567 } 1568 1569 b->b_len = PFSYNC_MINPKT; 1570 b->b_plus = NULL; 1571 } 1572 } 1573 1574 static void 1575 pfsync_sendout(int schedswi, int c) 1576 { 1577 struct pfsync_softc *sc = V_pfsyncif; 1578 struct ifnet *ifp = sc->sc_ifp; 1579 struct mbuf *m; 1580 struct pfsync_header *ph; 1581 struct pfsync_subheader *subh; 1582 struct pf_kstate *st, *st_next; 1583 struct pfsync_upd_req_item *ur; 1584 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1585 int aflen, offset; 1586 int q, count = 0; 1587 1588 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1589 KASSERT(b->b_len > PFSYNC_MINPKT, 1590 ("%s: sc_len %zu", __func__, b->b_len)); 1591 PFSYNC_BUCKET_LOCK_ASSERT(b); 1592 1593 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1594 pfsync_drop(sc); 1595 return; 1596 } 1597 1598 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1599 if (m == NULL) { 1600 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1601 V_pfsyncstats.pfsyncs_onomem++; 1602 return; 1603 } 1604 m->m_data += max_linkhdr; 1605 m->m_len = m->m_pkthdr.len = b->b_len; 1606 1607 /* build the ip header */ 1608 switch (sc->sc_sync_peer.ss_family) { 1609 #ifdef INET 1610 case AF_INET: 1611 { 1612 struct ip *ip; 1613 1614 ip = mtod(m, struct ip *); 1615 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1616 aflen = offset = sizeof(*ip); 1617 1618 ip->ip_len = htons(m->m_pkthdr.len); 1619 ip_fillid(ip); 1620 break; 1621 } 1622 #endif 1623 default: 1624 m_freem(m); 1625 return; 1626 } 1627 1628 1629 /* build the pfsync header */ 1630 ph = (struct pfsync_header *)(m->m_data + offset); 1631 bzero(ph, sizeof(*ph)); 1632 offset += sizeof(*ph); 1633 1634 ph->version = PFSYNC_VERSION; 1635 ph->len = htons(b->b_len - aflen); 1636 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1637 1638 /* walk the queues */ 1639 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1640 if (TAILQ_EMPTY(&b->b_qs[q])) 1641 continue; 1642 1643 subh = (struct pfsync_subheader *)(m->m_data + offset); 1644 offset += sizeof(*subh); 1645 1646 count = 0; 1647 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1648 KASSERT(st->sync_state == q, 1649 ("%s: st->sync_state == q", 1650 __func__)); 1651 /* 1652 * XXXGL: some of write methods do unlocked reads 1653 * of state data :( 1654 */ 1655 pfsync_qs[q].write(st, m->m_data + offset); 1656 offset += pfsync_qs[q].len; 1657 st->sync_state = PFSYNC_S_NONE; 1658 pf_release_state(st); 1659 count++; 1660 } 1661 TAILQ_INIT(&b->b_qs[q]); 1662 1663 bzero(subh, sizeof(*subh)); 1664 subh->action = pfsync_qs[q].action; 1665 subh->count = htons(count); 1666 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1667 } 1668 1669 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1670 subh = (struct pfsync_subheader *)(m->m_data + offset); 1671 offset += sizeof(*subh); 1672 1673 count = 0; 1674 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1675 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1676 1677 bcopy(&ur->ur_msg, m->m_data + offset, 1678 sizeof(ur->ur_msg)); 1679 offset += sizeof(ur->ur_msg); 1680 free(ur, M_PFSYNC); 1681 count++; 1682 } 1683 1684 bzero(subh, sizeof(*subh)); 1685 subh->action = PFSYNC_ACT_UPD_REQ; 1686 subh->count = htons(count); 1687 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1688 } 1689 1690 /* has someone built a custom region for us to add? */ 1691 if (b->b_plus != NULL) { 1692 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 1693 offset += b->b_pluslen; 1694 1695 b->b_plus = NULL; 1696 } 1697 1698 subh = (struct pfsync_subheader *)(m->m_data + offset); 1699 offset += sizeof(*subh); 1700 1701 bzero(subh, sizeof(*subh)); 1702 subh->action = PFSYNC_ACT_EOF; 1703 subh->count = htons(1); 1704 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1705 1706 /* we're done, let's put it on the wire */ 1707 if (ifp->if_bpf) { 1708 m->m_data += aflen; 1709 m->m_len = m->m_pkthdr.len = b->b_len - aflen; 1710 BPF_MTAP(ifp, m); 1711 m->m_data -= aflen; 1712 m->m_len = m->m_pkthdr.len = b->b_len; 1713 } 1714 1715 if (sc->sc_sync_if == NULL) { 1716 b->b_len = PFSYNC_MINPKT; 1717 m_freem(m); 1718 return; 1719 } 1720 1721 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1722 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1723 b->b_len = PFSYNC_MINPKT; 1724 1725 if (!_IF_QFULL(&b->b_snd)) 1726 _IF_ENQUEUE(&b->b_snd, m); 1727 else { 1728 m_freem(m); 1729 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1730 } 1731 if (schedswi) 1732 swi_sched(V_pfsync_swi_cookie, 0); 1733 } 1734 1735 static void 1736 pfsync_insert_state(struct pf_kstate *st) 1737 { 1738 struct pfsync_softc *sc = V_pfsyncif; 1739 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1740 1741 if (st->state_flags & PFSTATE_NOSYNC) 1742 return; 1743 1744 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1745 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1746 st->state_flags |= PFSTATE_NOSYNC; 1747 return; 1748 } 1749 1750 KASSERT(st->sync_state == PFSYNC_S_NONE, 1751 ("%s: st->sync_state %u", __func__, st->sync_state)); 1752 1753 PFSYNC_BUCKET_LOCK(b); 1754 if (b->b_len == PFSYNC_MINPKT) 1755 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1756 1757 pfsync_q_ins(st, PFSYNC_S_INS, true); 1758 PFSYNC_BUCKET_UNLOCK(b); 1759 1760 st->sync_updates = 0; 1761 } 1762 1763 static int 1764 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 1765 { 1766 struct pfsync_softc *sc = V_pfsyncif; 1767 struct pfsync_deferral *pd; 1768 struct pfsync_bucket *b; 1769 1770 if (m->m_flags & (M_BCAST|M_MCAST)) 1771 return (0); 1772 1773 if (sc == NULL) 1774 return (0); 1775 1776 b = pfsync_get_bucket(sc, st); 1777 1778 PFSYNC_LOCK(sc); 1779 1780 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 1781 !(sc->sc_flags & PFSYNCF_DEFER)) { 1782 PFSYNC_UNLOCK(sc); 1783 return (0); 1784 } 1785 1786 PFSYNC_BUCKET_LOCK(b); 1787 PFSYNC_UNLOCK(sc); 1788 1789 if (b->b_deferred >= 128) 1790 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 1791 1792 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1793 if (pd == NULL) { 1794 PFSYNC_BUCKET_UNLOCK(b); 1795 return (0); 1796 } 1797 b->b_deferred++; 1798 1799 m->m_flags |= M_SKIP_FIREWALL; 1800 st->state_flags |= PFSTATE_ACK; 1801 1802 pd->pd_sc = sc; 1803 pd->pd_st = st; 1804 pf_ref_state(st); 1805 pd->pd_m = m; 1806 1807 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 1808 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 1809 callout_reset(&pd->pd_tmo, PFSYNC_DEFER_TIMEOUT, pfsync_defer_tmo, pd); 1810 1811 pfsync_push(b); 1812 PFSYNC_BUCKET_UNLOCK(b); 1813 1814 return (1); 1815 } 1816 1817 static void 1818 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1819 { 1820 struct pfsync_softc *sc = pd->pd_sc; 1821 struct mbuf *m = pd->pd_m; 1822 struct pf_kstate *st = pd->pd_st; 1823 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1824 1825 PFSYNC_BUCKET_LOCK_ASSERT(b); 1826 1827 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1828 b->b_deferred--; 1829 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1830 free(pd, M_PFSYNC); 1831 pf_release_state(st); 1832 1833 if (drop) 1834 m_freem(m); 1835 else { 1836 _IF_ENQUEUE(&b->b_snd, m); 1837 pfsync_push(b); 1838 } 1839 } 1840 1841 static void 1842 pfsync_defer_tmo(void *arg) 1843 { 1844 struct epoch_tracker et; 1845 struct pfsync_deferral *pd = arg; 1846 struct pfsync_softc *sc = pd->pd_sc; 1847 struct mbuf *m = pd->pd_m; 1848 struct pf_kstate *st = pd->pd_st; 1849 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1850 1851 PFSYNC_BUCKET_LOCK_ASSERT(b); 1852 1853 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1854 b->b_deferred--; 1855 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1856 PFSYNC_BUCKET_UNLOCK(b); 1857 free(pd, M_PFSYNC); 1858 1859 if (sc->sc_sync_if == NULL) { 1860 pf_release_state(st); 1861 m_freem(m); 1862 return; 1863 } 1864 1865 NET_EPOCH_ENTER(et); 1866 CURVNET_SET(sc->sc_sync_if->if_vnet); 1867 1868 pfsync_tx(sc, m); 1869 1870 pf_release_state(st); 1871 1872 CURVNET_RESTORE(); 1873 NET_EPOCH_EXIT(et); 1874 } 1875 1876 static void 1877 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 1878 { 1879 struct pfsync_softc *sc = V_pfsyncif; 1880 struct pfsync_deferral *pd; 1881 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1882 1883 PFSYNC_BUCKET_LOCK_ASSERT(b); 1884 1885 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 1886 if (pd->pd_st == st) { 1887 if (callout_stop(&pd->pd_tmo) > 0) 1888 pfsync_undefer(pd, drop); 1889 1890 return; 1891 } 1892 } 1893 1894 panic("%s: unable to find deferred state", __func__); 1895 } 1896 1897 static void 1898 pfsync_undefer_state(struct pf_kstate *st, int drop) 1899 { 1900 struct pfsync_softc *sc = V_pfsyncif; 1901 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1902 1903 PFSYNC_BUCKET_LOCK(b); 1904 pfsync_undefer_state_locked(st, drop); 1905 PFSYNC_BUCKET_UNLOCK(b); 1906 } 1907 1908 static struct pfsync_bucket* 1909 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 1910 { 1911 int c = PF_IDHASH(st) % pfsync_buckets; 1912 return &sc->sc_buckets[c]; 1913 } 1914 1915 static void 1916 pfsync_update_state(struct pf_kstate *st) 1917 { 1918 struct pfsync_softc *sc = V_pfsyncif; 1919 bool sync = false, ref = true; 1920 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1921 1922 PF_STATE_LOCK_ASSERT(st); 1923 PFSYNC_BUCKET_LOCK(b); 1924 1925 if (st->state_flags & PFSTATE_ACK) 1926 pfsync_undefer_state_locked(st, 0); 1927 if (st->state_flags & PFSTATE_NOSYNC) { 1928 if (st->sync_state != PFSYNC_S_NONE) 1929 pfsync_q_del(st, true, b); 1930 PFSYNC_BUCKET_UNLOCK(b); 1931 return; 1932 } 1933 1934 if (b->b_len == PFSYNC_MINPKT) 1935 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1936 1937 switch (st->sync_state) { 1938 case PFSYNC_S_UPD_C: 1939 case PFSYNC_S_UPD: 1940 case PFSYNC_S_INS: 1941 /* we're already handling it */ 1942 1943 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1944 st->sync_updates++; 1945 if (st->sync_updates >= sc->sc_maxupdates) 1946 sync = true; 1947 } 1948 break; 1949 1950 case PFSYNC_S_IACK: 1951 pfsync_q_del(st, false, b); 1952 ref = false; 1953 /* FALLTHROUGH */ 1954 1955 case PFSYNC_S_NONE: 1956 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1957 st->sync_updates = 0; 1958 break; 1959 1960 default: 1961 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1962 } 1963 1964 if (sync || (time_uptime - st->pfsync_time) < 2) 1965 pfsync_push(b); 1966 1967 PFSYNC_BUCKET_UNLOCK(b); 1968 } 1969 1970 static void 1971 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1972 { 1973 struct pfsync_softc *sc = V_pfsyncif; 1974 struct pfsync_bucket *b = &sc->sc_buckets[0]; 1975 struct pfsync_upd_req_item *item; 1976 size_t nlen = sizeof(struct pfsync_upd_req); 1977 1978 PFSYNC_BUCKET_LOCK_ASSERT(b); 1979 1980 /* 1981 * This code does a bit to prevent multiple update requests for the 1982 * same state being generated. It searches current subheader queue, 1983 * but it doesn't lookup into queue of already packed datagrams. 1984 */ 1985 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 1986 if (item->ur_msg.id == id && 1987 item->ur_msg.creatorid == creatorid) 1988 return; 1989 1990 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1991 if (item == NULL) 1992 return; /* XXX stats */ 1993 1994 item->ur_msg.id = id; 1995 item->ur_msg.creatorid = creatorid; 1996 1997 if (TAILQ_EMPTY(&b->b_upd_req_list)) 1998 nlen += sizeof(struct pfsync_subheader); 1999 2000 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2001 pfsync_sendout(0, 0); 2002 2003 nlen = sizeof(struct pfsync_subheader) + 2004 sizeof(struct pfsync_upd_req); 2005 } 2006 2007 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2008 b->b_len += nlen; 2009 2010 pfsync_push(b); 2011 } 2012 2013 static bool 2014 pfsync_update_state_req(struct pf_kstate *st) 2015 { 2016 struct pfsync_softc *sc = V_pfsyncif; 2017 bool ref = true, full = false; 2018 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2019 2020 PF_STATE_LOCK_ASSERT(st); 2021 PFSYNC_BUCKET_LOCK(b); 2022 2023 if (st->state_flags & PFSTATE_NOSYNC) { 2024 if (st->sync_state != PFSYNC_S_NONE) 2025 pfsync_q_del(st, true, b); 2026 PFSYNC_BUCKET_UNLOCK(b); 2027 return (full); 2028 } 2029 2030 switch (st->sync_state) { 2031 case PFSYNC_S_UPD_C: 2032 case PFSYNC_S_IACK: 2033 pfsync_q_del(st, false, b); 2034 ref = false; 2035 /* FALLTHROUGH */ 2036 2037 case PFSYNC_S_NONE: 2038 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2039 pfsync_push(b); 2040 break; 2041 2042 case PFSYNC_S_INS: 2043 case PFSYNC_S_UPD: 2044 case PFSYNC_S_DEL: 2045 /* we're already handling it */ 2046 break; 2047 2048 default: 2049 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2050 } 2051 2052 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state)) 2053 full = true; 2054 2055 PFSYNC_BUCKET_UNLOCK(b); 2056 2057 return (full); 2058 } 2059 2060 static void 2061 pfsync_delete_state(struct pf_kstate *st) 2062 { 2063 struct pfsync_softc *sc = V_pfsyncif; 2064 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2065 bool ref = true; 2066 2067 PFSYNC_BUCKET_LOCK(b); 2068 if (st->state_flags & PFSTATE_ACK) 2069 pfsync_undefer_state_locked(st, 1); 2070 if (st->state_flags & PFSTATE_NOSYNC) { 2071 if (st->sync_state != PFSYNC_S_NONE) 2072 pfsync_q_del(st, true, b); 2073 PFSYNC_BUCKET_UNLOCK(b); 2074 return; 2075 } 2076 2077 if (b->b_len == PFSYNC_MINPKT) 2078 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2079 2080 switch (st->sync_state) { 2081 case PFSYNC_S_INS: 2082 /* We never got to tell the world so just forget about it. */ 2083 pfsync_q_del(st, true, b); 2084 break; 2085 2086 case PFSYNC_S_UPD_C: 2087 case PFSYNC_S_UPD: 2088 case PFSYNC_S_IACK: 2089 pfsync_q_del(st, false, b); 2090 ref = false; 2091 /* FALLTHROUGH */ 2092 2093 case PFSYNC_S_NONE: 2094 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 2095 break; 2096 2097 default: 2098 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2099 } 2100 2101 PFSYNC_BUCKET_UNLOCK(b); 2102 } 2103 2104 static void 2105 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2106 { 2107 struct { 2108 struct pfsync_subheader subh; 2109 struct pfsync_clr clr; 2110 } __packed r; 2111 2112 bzero(&r, sizeof(r)); 2113 2114 r.subh.action = PFSYNC_ACT_CLR; 2115 r.subh.count = htons(1); 2116 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2117 2118 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2119 r.clr.creatorid = creatorid; 2120 2121 pfsync_send_plus(&r, sizeof(r)); 2122 } 2123 2124 static void 2125 pfsync_q_ins(struct pf_kstate *st, int q, bool ref) 2126 { 2127 struct pfsync_softc *sc = V_pfsyncif; 2128 size_t nlen = pfsync_qs[q].len; 2129 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2130 2131 PFSYNC_BUCKET_LOCK_ASSERT(b); 2132 2133 KASSERT(st->sync_state == PFSYNC_S_NONE, 2134 ("%s: st->sync_state %u", __func__, st->sync_state)); 2135 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2136 b->b_len)); 2137 2138 if (TAILQ_EMPTY(&b->b_qs[q])) 2139 nlen += sizeof(struct pfsync_subheader); 2140 2141 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2142 pfsync_sendout(1, b->b_id); 2143 2144 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2145 } 2146 2147 b->b_len += nlen; 2148 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2149 st->sync_state = q; 2150 if (ref) 2151 pf_ref_state(st); 2152 } 2153 2154 static void 2155 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2156 { 2157 int q = st->sync_state; 2158 2159 PFSYNC_BUCKET_LOCK_ASSERT(b); 2160 KASSERT(st->sync_state != PFSYNC_S_NONE, 2161 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2162 2163 b->b_len -= pfsync_qs[q].len; 2164 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2165 st->sync_state = PFSYNC_S_NONE; 2166 if (unref) 2167 pf_release_state(st); 2168 2169 if (TAILQ_EMPTY(&b->b_qs[q])) 2170 b->b_len -= sizeof(struct pfsync_subheader); 2171 } 2172 2173 static void 2174 pfsync_bulk_start(void) 2175 { 2176 struct pfsync_softc *sc = V_pfsyncif; 2177 2178 if (V_pf_status.debug >= PF_DEBUG_MISC) 2179 printf("pfsync: received bulk update request\n"); 2180 2181 PFSYNC_BLOCK(sc); 2182 2183 sc->sc_ureq_received = time_uptime; 2184 sc->sc_bulk_hashid = 0; 2185 sc->sc_bulk_stateid = 0; 2186 pfsync_bulk_status(PFSYNC_BUS_START); 2187 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2188 PFSYNC_BUNLOCK(sc); 2189 } 2190 2191 static void 2192 pfsync_bulk_update(void *arg) 2193 { 2194 struct pfsync_softc *sc = arg; 2195 struct pf_kstate *s; 2196 int i; 2197 2198 PFSYNC_BLOCK_ASSERT(sc); 2199 CURVNET_SET(sc->sc_ifp->if_vnet); 2200 2201 /* 2202 * Start with last state from previous invocation. 2203 * It may had gone, in this case start from the 2204 * hash slot. 2205 */ 2206 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2207 2208 if (s != NULL) 2209 i = PF_IDHASH(s); 2210 else 2211 i = sc->sc_bulk_hashid; 2212 2213 for (; i <= pf_hashmask; i++) { 2214 struct pf_idhash *ih = &V_pf_idhash[i]; 2215 2216 if (s != NULL) 2217 PF_HASHROW_ASSERT(ih); 2218 else { 2219 PF_HASHROW_LOCK(ih); 2220 s = LIST_FIRST(&ih->states); 2221 } 2222 2223 for (; s; s = LIST_NEXT(s, entry)) { 2224 if (s->sync_state == PFSYNC_S_NONE && 2225 s->timeout < PFTM_MAX && 2226 s->pfsync_time <= sc->sc_ureq_received) { 2227 if (pfsync_update_state_req(s)) { 2228 /* We've filled a packet. */ 2229 sc->sc_bulk_hashid = i; 2230 sc->sc_bulk_stateid = s->id; 2231 sc->sc_bulk_creatorid = s->creatorid; 2232 PF_HASHROW_UNLOCK(ih); 2233 callout_reset(&sc->sc_bulk_tmo, 1, 2234 pfsync_bulk_update, sc); 2235 goto full; 2236 } 2237 } 2238 } 2239 PF_HASHROW_UNLOCK(ih); 2240 } 2241 2242 /* We're done. */ 2243 pfsync_bulk_status(PFSYNC_BUS_END); 2244 full: 2245 CURVNET_RESTORE(); 2246 } 2247 2248 static void 2249 pfsync_bulk_status(u_int8_t status) 2250 { 2251 struct { 2252 struct pfsync_subheader subh; 2253 struct pfsync_bus bus; 2254 } __packed r; 2255 2256 struct pfsync_softc *sc = V_pfsyncif; 2257 2258 bzero(&r, sizeof(r)); 2259 2260 r.subh.action = PFSYNC_ACT_BUS; 2261 r.subh.count = htons(1); 2262 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2263 2264 r.bus.creatorid = V_pf_status.hostid; 2265 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2266 r.bus.status = status; 2267 2268 pfsync_send_plus(&r, sizeof(r)); 2269 } 2270 2271 static void 2272 pfsync_bulk_fail(void *arg) 2273 { 2274 struct pfsync_softc *sc = arg; 2275 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2276 2277 CURVNET_SET(sc->sc_ifp->if_vnet); 2278 2279 PFSYNC_BLOCK_ASSERT(sc); 2280 2281 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2282 /* Try again */ 2283 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2284 pfsync_bulk_fail, V_pfsyncif); 2285 PFSYNC_BUCKET_LOCK(b); 2286 pfsync_request_update(0, 0); 2287 PFSYNC_BUCKET_UNLOCK(b); 2288 } else { 2289 /* Pretend like the transfer was ok. */ 2290 sc->sc_ureq_sent = 0; 2291 sc->sc_bulk_tries = 0; 2292 PFSYNC_LOCK(sc); 2293 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2294 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2295 "pfsync bulk fail"); 2296 sc->sc_flags |= PFSYNCF_OK; 2297 PFSYNC_UNLOCK(sc); 2298 if (V_pf_status.debug >= PF_DEBUG_MISC) 2299 printf("pfsync: failed to receive bulk update\n"); 2300 } 2301 2302 CURVNET_RESTORE(); 2303 } 2304 2305 static void 2306 pfsync_send_plus(void *plus, size_t pluslen) 2307 { 2308 struct pfsync_softc *sc = V_pfsyncif; 2309 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2310 2311 PFSYNC_BUCKET_LOCK(b); 2312 2313 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2314 pfsync_sendout(1, b->b_id); 2315 2316 b->b_plus = plus; 2317 b->b_len += (b->b_pluslen = pluslen); 2318 2319 pfsync_sendout(1, b->b_id); 2320 PFSYNC_BUCKET_UNLOCK(b); 2321 } 2322 2323 static void 2324 pfsync_timeout(void *arg) 2325 { 2326 struct pfsync_bucket *b = arg; 2327 2328 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2329 PFSYNC_BUCKET_LOCK(b); 2330 pfsync_push(b); 2331 PFSYNC_BUCKET_UNLOCK(b); 2332 CURVNET_RESTORE(); 2333 } 2334 2335 static void 2336 pfsync_push(struct pfsync_bucket *b) 2337 { 2338 2339 PFSYNC_BUCKET_LOCK_ASSERT(b); 2340 2341 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2342 swi_sched(V_pfsync_swi_cookie, 0); 2343 } 2344 2345 static void 2346 pfsync_push_all(struct pfsync_softc *sc) 2347 { 2348 int c; 2349 struct pfsync_bucket *b; 2350 2351 for (c = 0; c < pfsync_buckets; c++) { 2352 b = &sc->sc_buckets[c]; 2353 2354 PFSYNC_BUCKET_LOCK(b); 2355 pfsync_push(b); 2356 PFSYNC_BUCKET_UNLOCK(b); 2357 } 2358 } 2359 2360 static void 2361 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2362 { 2363 struct ip *ip; 2364 int af, error = 0; 2365 2366 ip = mtod(m, struct ip *); 2367 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2368 2369 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2370 2371 /* 2372 * We distinguish between a deferral packet and our 2373 * own pfsync packet based on M_SKIP_FIREWALL 2374 * flag. This is XXX. 2375 */ 2376 switch (af) { 2377 #ifdef INET 2378 case AF_INET: 2379 if (m->m_flags & M_SKIP_FIREWALL) { 2380 error = ip_output(m, NULL, NULL, 0, 2381 NULL, NULL); 2382 } else { 2383 error = ip_output(m, NULL, NULL, 2384 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2385 } 2386 break; 2387 #endif 2388 #ifdef INET6 2389 case AF_INET6: 2390 if (m->m_flags & M_SKIP_FIREWALL) { 2391 error = ip6_output(m, NULL, NULL, 0, 2392 NULL, NULL, NULL); 2393 } else { 2394 MPASS(false); 2395 /* We don't support pfsync over IPv6. */ 2396 /*error = ip6_output(m, NULL, NULL, 2397 IP_RAWOUTPUT, &sc->sc_imo6, NULL);*/ 2398 } 2399 break; 2400 #endif 2401 } 2402 2403 if (error == 0) 2404 V_pfsyncstats.pfsyncs_opackets++; 2405 else 2406 V_pfsyncstats.pfsyncs_oerrors++; 2407 2408 } 2409 2410 static void 2411 pfsyncintr(void *arg) 2412 { 2413 struct epoch_tracker et; 2414 struct pfsync_softc *sc = arg; 2415 struct pfsync_bucket *b; 2416 struct mbuf *m, *n; 2417 int c; 2418 2419 NET_EPOCH_ENTER(et); 2420 CURVNET_SET(sc->sc_ifp->if_vnet); 2421 2422 for (c = 0; c < pfsync_buckets; c++) { 2423 b = &sc->sc_buckets[c]; 2424 2425 PFSYNC_BUCKET_LOCK(b); 2426 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2427 pfsync_sendout(0, b->b_id); 2428 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2429 } 2430 _IF_DEQUEUE_ALL(&b->b_snd, m); 2431 PFSYNC_BUCKET_UNLOCK(b); 2432 2433 for (; m != NULL; m = n) { 2434 n = m->m_nextpkt; 2435 m->m_nextpkt = NULL; 2436 2437 pfsync_tx(sc, m); 2438 } 2439 } 2440 CURVNET_RESTORE(); 2441 NET_EPOCH_EXIT(et); 2442 } 2443 2444 static int 2445 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2446 struct in_mfilter *imf) 2447 { 2448 struct ip_moptions *imo = &sc->sc_imo; 2449 int error; 2450 2451 if (!(ifp->if_flags & IFF_MULTICAST)) 2452 return (EADDRNOTAVAIL); 2453 2454 switch (sc->sc_sync_peer.ss_family) { 2455 #ifdef INET 2456 case AF_INET: 2457 { 2458 ip_mfilter_init(&imo->imo_head); 2459 imo->imo_multicast_vif = -1; 2460 if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2461 &imf->imf_inm)) != 0) 2462 return (error); 2463 2464 ip_mfilter_insert(&imo->imo_head, imf); 2465 imo->imo_multicast_ifp = ifp; 2466 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2467 imo->imo_multicast_loop = 0; 2468 break; 2469 } 2470 #endif 2471 } 2472 2473 return (0); 2474 } 2475 2476 static void 2477 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2478 { 2479 struct ip_moptions *imo = &sc->sc_imo; 2480 struct in_mfilter *imf; 2481 2482 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2483 ip_mfilter_remove(&imo->imo_head, imf); 2484 in_leavegroup(imf->imf_inm, NULL); 2485 ip_mfilter_free(imf); 2486 } 2487 imo->imo_multicast_ifp = NULL; 2488 } 2489 2490 void 2491 pfsync_detach_ifnet(struct ifnet *ifp) 2492 { 2493 struct pfsync_softc *sc = V_pfsyncif; 2494 2495 if (sc == NULL) 2496 return; 2497 2498 PFSYNC_LOCK(sc); 2499 2500 if (sc->sc_sync_if == ifp) { 2501 /* We don't need mutlicast cleanup here, because the interface 2502 * is going away. We do need to ensure we don't try to do 2503 * cleanup later. 2504 */ 2505 ip_mfilter_init(&sc->sc_imo.imo_head); 2506 sc->sc_imo.imo_multicast_ifp = NULL; 2507 sc->sc_sync_if = NULL; 2508 } 2509 2510 PFSYNC_UNLOCK(sc); 2511 } 2512 2513 static int 2514 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2515 { 2516 struct sockaddr_storage sa; 2517 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2518 status->flags = pfsyncr->pfsyncr_defer; 2519 2520 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2521 2522 memset(&sa, 0, sizeof(sa)); 2523 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2524 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2525 in->sin_family = AF_INET; 2526 in->sin_len = sizeof(*in); 2527 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2528 } 2529 status->syncpeer = sa; 2530 2531 return 0; 2532 } 2533 2534 static int 2535 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2536 { 2537 struct in_mfilter *imf = NULL; 2538 struct ifnet *sifp; 2539 struct ip *ip; 2540 int error; 2541 int c; 2542 2543 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2544 return (EINVAL); 2545 2546 if (status->syncdev[0] == '\0') 2547 sifp = NULL; 2548 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2549 return (EINVAL); 2550 2551 struct sockaddr_in *status_sin = 2552 (struct sockaddr_in *)&(status->syncpeer); 2553 if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || 2554 status_sin->sin_addr.s_addr == 2555 htonl(INADDR_PFSYNC_GROUP))) 2556 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2557 2558 PFSYNC_LOCK(sc); 2559 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 2560 sc_sin->sin_family = AF_INET; 2561 sc_sin->sin_len = sizeof(*sc_sin); 2562 if (status_sin->sin_addr.s_addr == 0) { 2563 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 2564 } else { 2565 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 2566 } 2567 2568 sc->sc_maxupdates = status->maxupdates; 2569 if (status->flags & PFSYNCF_DEFER) { 2570 sc->sc_flags |= PFSYNCF_DEFER; 2571 V_pfsync_defer_ptr = pfsync_defer; 2572 } else { 2573 sc->sc_flags &= ~PFSYNCF_DEFER; 2574 V_pfsync_defer_ptr = NULL; 2575 } 2576 2577 if (sifp == NULL) { 2578 if (sc->sc_sync_if) 2579 if_rele(sc->sc_sync_if); 2580 sc->sc_sync_if = NULL; 2581 pfsync_multicast_cleanup(sc); 2582 PFSYNC_UNLOCK(sc); 2583 return (0); 2584 } 2585 2586 for (c = 0; c < pfsync_buckets; c++) { 2587 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 2588 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 2589 (sifp->if_mtu < sc->sc_ifp->if_mtu || 2590 (sc->sc_sync_if != NULL && 2591 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 2592 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 2593 pfsync_sendout(1, c); 2594 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 2595 } 2596 2597 pfsync_multicast_cleanup(sc); 2598 2599 if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 2600 error = pfsync_multicast_setup(sc, sifp, imf); 2601 if (error) { 2602 if_rele(sifp); 2603 ip_mfilter_free(imf); 2604 PFSYNC_UNLOCK(sc); 2605 return (error); 2606 } 2607 } 2608 if (sc->sc_sync_if) 2609 if_rele(sc->sc_sync_if); 2610 sc->sc_sync_if = sifp; 2611 2612 ip = &sc->sc_template.ipv4; 2613 bzero(ip, sizeof(*ip)); 2614 ip->ip_v = IPVERSION; 2615 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 2616 ip->ip_tos = IPTOS_LOWDELAY; 2617 /* len and id are set later. */ 2618 ip->ip_off = htons(IP_DF); 2619 ip->ip_ttl = PFSYNC_DFLTTL; 2620 ip->ip_p = IPPROTO_PFSYNC; 2621 ip->ip_src.s_addr = INADDR_ANY; 2622 ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; 2623 2624 /* Request a full state table update. */ 2625 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2626 (*carp_demote_adj_p)(V_pfsync_carp_adj, 2627 "pfsync bulk start"); 2628 sc->sc_flags &= ~PFSYNCF_OK; 2629 if (V_pf_status.debug >= PF_DEBUG_MISC) 2630 printf("pfsync: requesting bulk update\n"); 2631 PFSYNC_UNLOCK(sc); 2632 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 2633 pfsync_request_update(0, 0); 2634 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 2635 PFSYNC_BLOCK(sc); 2636 sc->sc_ureq_sent = time_uptime; 2637 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 2638 PFSYNC_BUNLOCK(sc); 2639 return (0); 2640 } 2641 2642 static void 2643 pfsync_pointers_init(void) 2644 { 2645 2646 PF_RULES_WLOCK(); 2647 V_pfsync_state_import_ptr = pfsync_state_import; 2648 V_pfsync_insert_state_ptr = pfsync_insert_state; 2649 V_pfsync_update_state_ptr = pfsync_update_state; 2650 V_pfsync_delete_state_ptr = pfsync_delete_state; 2651 V_pfsync_clear_states_ptr = pfsync_clear_states; 2652 V_pfsync_defer_ptr = pfsync_defer; 2653 PF_RULES_WUNLOCK(); 2654 } 2655 2656 static void 2657 pfsync_pointers_uninit(void) 2658 { 2659 2660 PF_RULES_WLOCK(); 2661 V_pfsync_state_import_ptr = NULL; 2662 V_pfsync_insert_state_ptr = NULL; 2663 V_pfsync_update_state_ptr = NULL; 2664 V_pfsync_delete_state_ptr = NULL; 2665 V_pfsync_clear_states_ptr = NULL; 2666 V_pfsync_defer_ptr = NULL; 2667 PF_RULES_WUNLOCK(); 2668 } 2669 2670 static void 2671 vnet_pfsync_init(const void *unused __unused) 2672 { 2673 int error; 2674 2675 V_pfsync_cloner = if_clone_simple(pfsyncname, 2676 pfsync_clone_create, pfsync_clone_destroy, 1); 2677 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 2678 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2679 if (error) { 2680 if_clone_detach(V_pfsync_cloner); 2681 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2682 } 2683 2684 pfsync_pointers_init(); 2685 } 2686 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2687 vnet_pfsync_init, NULL); 2688 2689 static void 2690 vnet_pfsync_uninit(const void *unused __unused) 2691 { 2692 int ret __diagused; 2693 2694 pfsync_pointers_uninit(); 2695 2696 if_clone_detach(V_pfsync_cloner); 2697 ret = swi_remove(V_pfsync_swi_cookie); 2698 MPASS(ret == 0); 2699 ret = intr_event_destroy(V_pfsync_swi_ie); 2700 MPASS(ret == 0); 2701 } 2702 2703 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 2704 vnet_pfsync_uninit, NULL); 2705 2706 static int 2707 pfsync_init(void) 2708 { 2709 #ifdef INET 2710 int error; 2711 2712 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 2713 2714 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 2715 if (error) 2716 return (error); 2717 #endif 2718 2719 return (0); 2720 } 2721 2722 static void 2723 pfsync_uninit(void) 2724 { 2725 pfsync_detach_ifnet_ptr = NULL; 2726 2727 #ifdef INET 2728 ipproto_unregister(IPPROTO_PFSYNC); 2729 #endif 2730 } 2731 2732 static int 2733 pfsync_modevent(module_t mod, int type, void *data) 2734 { 2735 int error = 0; 2736 2737 switch (type) { 2738 case MOD_LOAD: 2739 error = pfsync_init(); 2740 break; 2741 case MOD_UNLOAD: 2742 pfsync_uninit(); 2743 break; 2744 default: 2745 error = EINVAL; 2746 break; 2747 } 2748 2749 return (error); 2750 } 2751 2752 static moduledata_t pfsync_mod = { 2753 pfsyncname, 2754 pfsync_modevent, 2755 0 2756 }; 2757 2758 #define PFSYNC_MODVER 1 2759 2760 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2761 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2762 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2763 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2764