1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 #include "opt_pf.h" 66 67 #include <sys/param.h> 68 #include <sys/bus.h> 69 #include <sys/endian.h> 70 #include <sys/interrupt.h> 71 #include <sys/kernel.h> 72 #include <sys/lock.h> 73 #include <sys/mbuf.h> 74 #include <sys/module.h> 75 #include <sys/mutex.h> 76 #include <sys/nv.h> 77 #include <sys/priv.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sysctl.h> 82 #include <sys/syslog.h> 83 84 #include <net/bpf.h> 85 #include <net/if.h> 86 #include <net/if_var.h> 87 #include <net/if_clone.h> 88 #include <net/if_private.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/route.h> 93 #include <net/if_pfsync.h> 94 95 #include <netinet/if_ether.h> 96 #include <netinet/in.h> 97 #include <netinet/in_var.h> 98 #include <netinet6/in6_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip6.h> 101 #include <netinet/ip_carp.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_fsm.h> 105 #include <netinet/tcp_seq.h> 106 107 #include <netinet/ip6.h> 108 #include <netinet6/ip6_var.h> 109 #include <netinet6/scope6_var.h> 110 111 #include <netpfil/pf/pfsync_nv.h> 112 113 struct pfsync_bucket; 114 struct pfsync_softc; 115 116 union inet_template { 117 struct ip ipv4; 118 struct ip6_hdr ipv6; 119 }; 120 121 #define PFSYNC_MINPKT ( \ 122 sizeof(union inet_template) + \ 123 sizeof(struct pfsync_header) + \ 124 sizeof(struct pfsync_subheader) ) 125 126 static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, 127 struct pf_state_peer_export *); 128 static int pfsync_in_clr(struct mbuf *, int, int, int, int); 129 static int pfsync_in_ins(struct mbuf *, int, int, int, int); 130 static int pfsync_in_iack(struct mbuf *, int, int, int, int); 131 static int pfsync_in_upd(struct mbuf *, int, int, int, int); 132 static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); 133 static int pfsync_in_ureq(struct mbuf *, int, int, int, int); 134 static int pfsync_in_del_c(struct mbuf *, int, int, int, int); 135 static int pfsync_in_bus(struct mbuf *, int, int, int, int); 136 static int pfsync_in_tdb(struct mbuf *, int, int, int, int); 137 static int pfsync_in_eof(struct mbuf *, int, int, int, int); 138 static int pfsync_in_error(struct mbuf *, int, int, int, int); 139 140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { 141 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 142 pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ 143 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 144 pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ 145 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 146 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL */ 148 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 149 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 150 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 151 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 152 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 153 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 154 pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ 155 pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ 156 }; 157 158 struct pfsync_q { 159 void (*write)(struct pf_kstate *, void *); 160 size_t len; 161 u_int8_t action; 162 }; 163 164 /* We have the following sync queues */ 165 enum pfsync_q_id { 166 PFSYNC_Q_INS_1301, 167 PFSYNC_Q_INS_1400, 168 PFSYNC_Q_IACK, 169 PFSYNC_Q_UPD_1301, 170 PFSYNC_Q_UPD_1400, 171 PFSYNC_Q_UPD_C, 172 PFSYNC_Q_DEL_C, 173 PFSYNC_Q_COUNT, 174 }; 175 176 /* Functions for building messages for given queue */ 177 static void pfsync_out_state_1301(struct pf_kstate *, void *); 178 static void pfsync_out_state_1400(struct pf_kstate *, void *); 179 static void pfsync_out_iack(struct pf_kstate *, void *); 180 static void pfsync_out_upd_c(struct pf_kstate *, void *); 181 static void pfsync_out_del_c(struct pf_kstate *, void *); 182 183 /* Attach those functions to queue */ 184 static struct pfsync_q pfsync_qs[] = { 185 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, 186 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, 187 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 188 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, 189 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, 190 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 191 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 192 }; 193 194 /* Map queue to pf_kstate->sync_state */ 195 static u_int8_t pfsync_qid_sstate[] = { 196 PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ 197 PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ 198 PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ 199 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ 200 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ 201 PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ 202 PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ 203 }; 204 205 /* Map pf_kstate->sync_state to queue */ 206 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); 207 208 static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); 209 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 210 211 static void pfsync_update_state(struct pf_kstate *); 212 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 213 214 struct pfsync_upd_req_item { 215 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 216 struct pfsync_upd_req ur_msg; 217 }; 218 219 struct pfsync_deferral { 220 struct pfsync_softc *pd_sc; 221 TAILQ_ENTRY(pfsync_deferral) pd_entry; 222 struct callout pd_tmo; 223 224 struct pf_kstate *pd_st; 225 struct mbuf *pd_m; 226 }; 227 228 struct pfsync_bucket 229 { 230 int b_id; 231 struct pfsync_softc *b_sc; 232 struct mtx b_mtx; 233 struct callout b_tmo; 234 int b_flags; 235 #define PFSYNCF_BUCKET_PUSH 0x00000001 236 237 size_t b_len; 238 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; 239 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 240 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 241 u_int b_deferred; 242 uint8_t *b_plus; 243 size_t b_pluslen; 244 245 struct ifaltq b_snd; 246 }; 247 248 struct pfsync_softc { 249 /* Configuration */ 250 struct ifnet *sc_ifp; 251 struct ifnet *sc_sync_if; 252 struct ip_moptions sc_imo; 253 struct ip6_moptions sc_im6o; 254 struct sockaddr_storage sc_sync_peer; 255 uint32_t sc_flags; 256 uint8_t sc_maxupdates; 257 union inet_template sc_template; 258 struct mtx sc_mtx; 259 uint32_t sc_version; 260 261 /* Queued data */ 262 struct pfsync_bucket *sc_buckets; 263 264 /* Bulk update info */ 265 struct mtx sc_bulk_mtx; 266 uint32_t sc_ureq_sent; 267 int sc_bulk_tries; 268 uint32_t sc_ureq_received; 269 int sc_bulk_hashid; 270 uint64_t sc_bulk_stateid; 271 uint32_t sc_bulk_creatorid; 272 struct callout sc_bulk_tmo; 273 struct callout sc_bulkfail_tmo; 274 }; 275 276 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 277 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 278 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 279 280 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 281 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 282 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 283 284 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 285 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 286 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 287 288 #define PFSYNC_DEFER_TIMEOUT 20 289 290 static const char pfsyncname[] = "pfsync"; 291 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 292 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 293 #define V_pfsyncif VNET(pfsyncif) 294 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 295 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 296 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 297 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 298 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 299 #define V_pfsyncstats VNET(pfsyncstats) 300 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 301 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 302 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; 303 #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) 304 305 static void pfsync_timeout(void *); 306 static void pfsync_push(struct pfsync_bucket *); 307 static void pfsync_push_all(struct pfsync_softc *); 308 static void pfsyncintr(void *); 309 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 310 struct in_mfilter *, struct in6_mfilter *); 311 static void pfsync_multicast_cleanup(struct pfsync_softc *); 312 static void pfsync_pointers_init(void); 313 static void pfsync_pointers_uninit(void); 314 static int pfsync_init(void); 315 static void pfsync_uninit(void); 316 317 static unsigned long pfsync_buckets; 318 319 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 320 "PFSYNC"); 321 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 322 &VNET_NAME(pfsyncstats), pfsyncstats, 323 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 324 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 325 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 326 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 327 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 328 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, 329 &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); 330 331 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 332 static void pfsync_clone_destroy(struct ifnet *); 333 static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, 334 struct pf_state_peer *); 335 static int pfsyncoutput(struct ifnet *, struct mbuf *, 336 const struct sockaddr *, struct route *); 337 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 338 339 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 340 static void pfsync_undefer(struct pfsync_deferral *, int); 341 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 342 static void pfsync_undefer_state(struct pf_kstate *, int); 343 static void pfsync_defer_tmo(void *); 344 345 static void pfsync_request_update(u_int32_t, u_int64_t); 346 static bool pfsync_update_state_req(struct pf_kstate *); 347 348 static void pfsync_drop_all(struct pfsync_softc *); 349 static void pfsync_drop(struct pfsync_softc *, int); 350 static void pfsync_sendout(int, int); 351 static void pfsync_send_plus(void *, size_t); 352 353 static void pfsync_bulk_start(void); 354 static void pfsync_bulk_status(u_int8_t); 355 static void pfsync_bulk_update(void *); 356 static void pfsync_bulk_fail(void *); 357 358 static void pfsync_detach_ifnet(struct ifnet *); 359 360 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 361 struct pfsync_kstatus *); 362 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 363 struct pfsync_softc *); 364 365 #ifdef IPSEC 366 static void pfsync_update_net_tdb(struct pfsync_tdb *); 367 #endif 368 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 369 struct pf_kstate *); 370 371 #define PFSYNC_MAX_BULKTRIES 12 372 373 VNET_DEFINE(struct if_clone *, pfsync_cloner); 374 #define V_pfsync_cloner VNET(pfsync_cloner) 375 376 const struct in6_addr in6addr_linklocal_pfsync_group = 377 {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 378 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; 379 static int 380 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 381 { 382 struct pfsync_softc *sc; 383 struct ifnet *ifp; 384 struct pfsync_bucket *b; 385 int c; 386 enum pfsync_q_id q; 387 388 if (unit != 0) 389 return (EINVAL); 390 391 if (! pfsync_buckets) 392 pfsync_buckets = mp_ncpus * 2; 393 394 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 395 sc->sc_flags |= PFSYNCF_OK; 396 sc->sc_maxupdates = 128; 397 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 398 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 399 M_PFSYNC, M_ZERO | M_WAITOK); 400 for (c = 0; c < pfsync_buckets; c++) { 401 b = &sc->sc_buckets[c]; 402 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 403 404 b->b_id = c; 405 b->b_sc = sc; 406 b->b_len = PFSYNC_MINPKT; 407 408 for (q = 0; q < PFSYNC_Q_COUNT; q++) 409 TAILQ_INIT(&b->b_qs[q]); 410 411 TAILQ_INIT(&b->b_upd_req_list); 412 TAILQ_INIT(&b->b_deferrals); 413 414 callout_init(&b->b_tmo, 1); 415 416 b->b_snd.ifq_maxlen = ifqmaxlen; 417 } 418 419 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 420 if_initname(ifp, pfsyncname, unit); 421 ifp->if_softc = sc; 422 ifp->if_ioctl = pfsyncioctl; 423 ifp->if_output = pfsyncoutput; 424 ifp->if_hdrlen = sizeof(struct pfsync_header); 425 ifp->if_mtu = ETHERMTU; 426 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 427 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 428 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 429 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 430 431 if_attach(ifp); 432 433 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 434 435 V_pfsyncif = sc; 436 437 return (0); 438 } 439 440 static void 441 pfsync_clone_destroy(struct ifnet *ifp) 442 { 443 struct pfsync_softc *sc = ifp->if_softc; 444 struct pfsync_bucket *b; 445 int c, ret; 446 447 for (c = 0; c < pfsync_buckets; c++) { 448 b = &sc->sc_buckets[c]; 449 /* 450 * At this stage, everything should have already been 451 * cleared by pfsync_uninit(), and we have only to 452 * drain callouts. 453 */ 454 PFSYNC_BUCKET_LOCK(b); 455 while (b->b_deferred > 0) { 456 struct pfsync_deferral *pd = 457 TAILQ_FIRST(&b->b_deferrals); 458 459 ret = callout_stop(&pd->pd_tmo); 460 PFSYNC_BUCKET_UNLOCK(b); 461 if (ret > 0) { 462 pfsync_undefer(pd, 1); 463 } else { 464 callout_drain(&pd->pd_tmo); 465 } 466 PFSYNC_BUCKET_LOCK(b); 467 } 468 MPASS(b->b_deferred == 0); 469 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 470 PFSYNC_BUCKET_UNLOCK(b); 471 472 free(b->b_plus, M_PFSYNC); 473 b->b_plus = NULL; 474 b->b_pluslen = 0; 475 476 callout_drain(&b->b_tmo); 477 } 478 479 callout_drain(&sc->sc_bulkfail_tmo); 480 callout_drain(&sc->sc_bulk_tmo); 481 482 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 483 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 484 bpfdetach(ifp); 485 if_detach(ifp); 486 487 pfsync_drop_all(sc); 488 489 if_free(ifp); 490 pfsync_multicast_cleanup(sc); 491 mtx_destroy(&sc->sc_mtx); 492 mtx_destroy(&sc->sc_bulk_mtx); 493 494 for (c = 0; c < pfsync_buckets; c++) { 495 b = &sc->sc_buckets[c]; 496 mtx_destroy(&b->b_mtx); 497 } 498 free(sc->sc_buckets, M_PFSYNC); 499 free(sc, M_PFSYNC); 500 501 V_pfsyncif = NULL; 502 } 503 504 static int 505 pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, 506 struct pf_state_peer *d) 507 { 508 if (s->scrub.scrub_flag && d->scrub == NULL) { 509 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 510 if (d->scrub == NULL) 511 return (ENOMEM); 512 } 513 514 return (0); 515 } 516 517 static int 518 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) 519 { 520 struct pfsync_softc *sc = V_pfsyncif; 521 #ifndef __NO_STRICT_ALIGNMENT 522 struct pfsync_state_key key[2]; 523 #endif 524 struct pfsync_state_key *kw, *ks; 525 struct pf_kstate *st = NULL; 526 struct pf_state_key *skw = NULL, *sks = NULL; 527 struct pf_krule *r = NULL; 528 struct pfi_kkif *kif; 529 struct pfi_kkif *rt_kif = NULL; 530 struct pf_kpooladdr *rpool_first; 531 int error; 532 sa_family_t rt_af = 0; 533 uint8_t rt = 0; 534 int n = 0; 535 536 PF_RULES_RASSERT(); 537 538 if (sp->pfs_1301.creatorid == 0) { 539 if (V_pf_status.debug >= PF_DEBUG_MISC) 540 printf("%s: invalid creator id: %08x\n", __func__, 541 ntohl(sp->pfs_1301.creatorid)); 542 return (EINVAL); 543 } 544 545 if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { 546 if (V_pf_status.debug >= PF_DEBUG_MISC) 547 printf("%s: unknown interface: %s\n", __func__, 548 sp->pfs_1301.ifname); 549 if (flags & PFSYNC_SI_IOCTL) 550 return (EINVAL); 551 return (0); /* skip this state */ 552 } 553 554 /* 555 * If the ruleset checksums match or the state is coming from the ioctl, 556 * it's safe to associate the state with the rule of that number. 557 */ 558 if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && 559 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < 560 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { 561 TAILQ_FOREACH(r, pf_main_ruleset.rules[ 562 PF_RULESET_FILTER].active.ptr, entries) 563 if (ntohl(sp->pfs_1301.rule) == n++) 564 break; 565 } else 566 r = &V_pf_default_rule; 567 568 /* 569 * Check routing interface early on. Do it before allocating memory etc. 570 * because there is a high chance there will be a lot more such states. 571 */ 572 switch (msg_version) { 573 case PFSYNC_MSG_VERSION_1301: 574 /* 575 * On FreeBSD <= 13 the routing interface and routing operation 576 * are not sent over pfsync. If the ruleset is identical, 577 * though, we might be able to recover the routing information 578 * from the local ruleset. 579 */ 580 if (r != &V_pf_default_rule) { 581 struct pf_kpool *pool = &r->route; 582 583 /* Backwards compatibility. */ 584 if (TAILQ_EMPTY(&pool->list)) 585 pool = &r->rdr; 586 587 /* 588 * The ruleset is identical, try to recover. If the rule 589 * has a redirection pool with a single interface, there 590 * is a chance that this interface is identical as on 591 * the pfsync peer. If there's more than one interface, 592 * give up, as we can't be sure that we will pick the 593 * same one as the pfsync peer did. 594 */ 595 rpool_first = TAILQ_FIRST(&(pool->list)); 596 if ((rpool_first == NULL) || 597 (TAILQ_NEXT(rpool_first, entries) != NULL)) { 598 DPFPRINTF(PF_DEBUG_MISC, 599 "%s: can't recover routing information " 600 "because of empty or bad redirection pool", 601 __func__); 602 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 603 } 604 rt = r->rt; 605 rt_kif = rpool_first->kif; 606 /* 607 * Guess the AF of the route address, FreeBSD 13 does 608 * not support af-to nor prefer-ipv6-nexthop 609 * so it should be safe. 610 */ 611 rt_af = r->af; 612 } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { 613 /* 614 * Ruleset different, routing *supposedly* requested, 615 * give up on recovering. 616 */ 617 DPFPRINTF(PF_DEBUG_MISC, 618 "%s: can't recover routing information " 619 "because of different ruleset", __func__); 620 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 621 } 622 break; 623 case PFSYNC_MSG_VERSION_1400: 624 /* 625 * On FreeBSD 14 and above we're not taking any chances. 626 * We use the information synced to us. 627 */ 628 if (sp->pfs_1400.rt) { 629 rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); 630 if (rt_kif == NULL) { 631 DPFPRINTF(PF_DEBUG_MISC, 632 "%s: unknown route interface: %s", 633 __func__, sp->pfs_1400.rt_ifname); 634 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 635 } 636 rt = sp->pfs_1400.rt; 637 /* 638 * Guess the AF of the route address, FreeBSD 14 does 639 * not support af-to nor prefer-ipv6-nexthop 640 * so it should be safe. 641 */ 642 rt_af = sp->pfs_1400.af; 643 } 644 break; 645 } 646 647 if ((r->max_states && 648 counter_u64_fetch(r->states_cur) >= r->max_states)) 649 goto cleanup; 650 651 /* 652 * XXXGL: consider M_WAITOK in ioctl path after. 653 */ 654 st = pf_alloc_state(M_NOWAIT); 655 if (__predict_false(st == NULL)) 656 goto cleanup; 657 658 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 659 goto cleanup; 660 661 #ifndef __NO_STRICT_ALIGNMENT 662 bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); 663 kw = &key[PF_SK_WIRE]; 664 ks = &key[PF_SK_STACK]; 665 #else 666 kw = &sp->pfs_1301.key[PF_SK_WIRE]; 667 ks = &sp->pfs_1301.key[PF_SK_STACK]; 668 #endif 669 670 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || 671 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || 672 kw->port[0] != ks->port[0] || 673 kw->port[1] != ks->port[1]) { 674 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 675 if (sks == NULL) 676 goto cleanup; 677 } else 678 sks = skw; 679 680 /* allocate memory for scrub info */ 681 if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || 682 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) 683 goto cleanup; 684 685 /* Copy to state key(s). */ 686 skw->addr[0] = kw->addr[0]; 687 skw->addr[1] = kw->addr[1]; 688 skw->port[0] = kw->port[0]; 689 skw->port[1] = kw->port[1]; 690 skw->proto = sp->pfs_1301.proto; 691 skw->af = sp->pfs_1301.af; 692 if (sks != skw) { 693 sks->addr[0] = ks->addr[0]; 694 sks->addr[1] = ks->addr[1]; 695 sks->port[0] = ks->port[0]; 696 sks->port[1] = ks->port[1]; 697 sks->proto = sp->pfs_1301.proto; 698 sks->af = sp->pfs_1301.af; 699 } 700 701 /* copy to state */ 702 bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, sizeof(st->act.rt_addr)); 703 st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; 704 st->expire = pf_get_uptime(); 705 if (sp->pfs_1301.expire) { 706 uint32_t timeout; 707 708 timeout = r->timeout[sp->pfs_1301.timeout]; 709 if (!timeout) 710 timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; 711 712 /* sp->expire may have been adaptively scaled by export. */ 713 st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; 714 } 715 716 st->direction = sp->pfs_1301.direction; 717 st->act.log = sp->pfs_1301.log; 718 st->timeout = sp->pfs_1301.timeout; 719 720 st->act.rt = rt; 721 st->act.rt_kif = rt_kif; 722 st->act.rt_af = rt_af; 723 724 switch (msg_version) { 725 case PFSYNC_MSG_VERSION_1301: 726 st->state_flags = sp->pfs_1301.state_flags; 727 /* 728 * In FreeBSD 13 pfsync lacks many attributes. Copy them 729 * from the rule if possible. If rule can't be matched 730 * clear any set options as we can't recover their 731 * parameters. 732 */ 733 if (r == &V_pf_default_rule) { 734 st->state_flags &= ~PFSTATE_SETMASK; 735 } else { 736 /* 737 * Similar to pf_rule_to_actions(). This code 738 * won't set the actions properly if they come 739 * from multiple "match" rules as only rule 740 * creating the state is send over pfsync. 741 */ 742 st->act.qid = r->qid; 743 st->act.pqid = r->pqid; 744 st->act.rtableid = r->rtableid; 745 if (r->scrub_flags & PFSTATE_SETTOS) 746 st->act.set_tos = r->set_tos; 747 st->act.min_ttl = r->min_ttl; 748 st->act.max_mss = r->max_mss; 749 st->state_flags |= (r->scrub_flags & 750 (PFSTATE_NODF|PFSTATE_RANDOMID| 751 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| 752 PFSTATE_SETPRIO)); 753 if (r->dnpipe || r->dnrpipe) { 754 if (r->free_flags & PFRULE_DN_IS_PIPE) 755 st->state_flags |= PFSTATE_DN_IS_PIPE; 756 else 757 st->state_flags &= ~PFSTATE_DN_IS_PIPE; 758 } 759 st->act.dnpipe = r->dnpipe; 760 st->act.dnrpipe = r->dnrpipe; 761 } 762 break; 763 case PFSYNC_MSG_VERSION_1400: 764 st->state_flags = ntohs(sp->pfs_1400.state_flags); 765 st->act.qid = ntohs(sp->pfs_1400.qid); 766 st->act.pqid = ntohs(sp->pfs_1400.pqid); 767 st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); 768 st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); 769 st->act.rtableid = ntohl(sp->pfs_1400.rtableid); 770 st->act.min_ttl = sp->pfs_1400.min_ttl; 771 st->act.set_tos = sp->pfs_1400.set_tos; 772 st->act.max_mss = ntohs(sp->pfs_1400.max_mss); 773 st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; 774 st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; 775 break; 776 default: 777 panic("%s: Unsupported pfsync_msg_version %d", 778 __func__, msg_version); 779 } 780 781 if (! (st->act.rtableid == -1 || 782 (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) 783 goto cleanup; 784 785 st->id = sp->pfs_1301.id; 786 st->creatorid = sp->pfs_1301.creatorid; 787 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 788 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 789 790 st->rule = r; 791 st->nat_rule = NULL; 792 st->anchor = NULL; 793 794 st->pfsync_time = time_uptime; 795 st->sync_state = PFSYNC_S_NONE; 796 797 if (!(flags & PFSYNC_SI_IOCTL)) 798 st->state_flags |= PFSTATE_NOSYNC; 799 800 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 801 goto cleanup_state; 802 803 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 804 counter_u64_add(r->states_cur, 1); 805 counter_u64_add(r->states_tot, 1); 806 807 if (!(flags & PFSYNC_SI_IOCTL)) { 808 st->state_flags &= ~PFSTATE_NOSYNC; 809 if (st->state_flags & PFSTATE_ACK) { 810 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 811 PFSYNC_BUCKET_LOCK(b); 812 pfsync_q_ins(st, PFSYNC_S_IACK, true); 813 PFSYNC_BUCKET_UNLOCK(b); 814 815 pfsync_push_all(sc); 816 } 817 } 818 st->state_flags &= ~PFSTATE_ACK; 819 PF_STATE_UNLOCK(st); 820 821 return (0); 822 823 cleanup: 824 error = ENOMEM; 825 826 if (skw == sks) 827 sks = NULL; 828 uma_zfree(V_pf_state_key_z, skw); 829 uma_zfree(V_pf_state_key_z, sks); 830 831 cleanup_state: /* pf_state_insert() frees the state keys. */ 832 if (st) { 833 st->timeout = PFTM_UNLINKED; /* appease an assert */ 834 pf_free_state(st); 835 } 836 return (error); 837 } 838 839 #ifdef INET 840 static int 841 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 842 { 843 struct pfsync_softc *sc = V_pfsyncif; 844 struct mbuf *m = *mp; 845 struct ip *ip = mtod(m, struct ip *); 846 struct pfsync_header *ph; 847 struct pfsync_subheader subh; 848 849 int offset, len, flags = 0; 850 int rv; 851 uint16_t count; 852 853 PF_RULES_RLOCK_TRACKER; 854 855 *mp = NULL; 856 V_pfsyncstats.pfsyncs_ipackets++; 857 858 /* Verify that we have a sync interface configured. */ 859 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 860 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 861 goto done; 862 863 /* verify that the packet came in on the right interface */ 864 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 865 V_pfsyncstats.pfsyncs_badif++; 866 goto done; 867 } 868 869 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 870 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 871 /* verify that the IP TTL is 255. */ 872 if (ip->ip_ttl != PFSYNC_DFLTTL) { 873 V_pfsyncstats.pfsyncs_badttl++; 874 goto done; 875 } 876 877 offset = ip->ip_hl << 2; 878 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 879 V_pfsyncstats.pfsyncs_hdrops++; 880 goto done; 881 } 882 883 if (offset + sizeof(*ph) > m->m_len) { 884 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 885 V_pfsyncstats.pfsyncs_hdrops++; 886 return (IPPROTO_DONE); 887 } 888 ip = mtod(m, struct ip *); 889 } 890 ph = (struct pfsync_header *)((char *)ip + offset); 891 892 /* verify the version */ 893 if (ph->version != PFSYNC_VERSION) { 894 V_pfsyncstats.pfsyncs_badver++; 895 goto done; 896 } 897 898 len = ntohs(ph->len) + offset; 899 if (m->m_pkthdr.len < len) { 900 V_pfsyncstats.pfsyncs_badlen++; 901 goto done; 902 } 903 904 /* 905 * Trusting pf_chksum during packet processing, as well as seeking 906 * in interface name tree, require holding PF_RULES_RLOCK(). 907 */ 908 PF_RULES_RLOCK(); 909 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 910 flags = PFSYNC_SI_CKSUM; 911 912 offset += sizeof(*ph); 913 while (offset <= len - sizeof(subh)) { 914 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 915 offset += sizeof(subh); 916 917 if (subh.action >= PFSYNC_ACT_MAX) { 918 V_pfsyncstats.pfsyncs_badact++; 919 PF_RULES_RUNLOCK(); 920 goto done; 921 } 922 923 count = ntohs(subh.count); 924 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 925 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 926 if (rv == -1) { 927 PF_RULES_RUNLOCK(); 928 return (IPPROTO_DONE); 929 } 930 931 offset += rv; 932 } 933 PF_RULES_RUNLOCK(); 934 935 done: 936 m_freem(m); 937 return (IPPROTO_DONE); 938 } 939 #endif 940 941 #ifdef INET6 942 static int 943 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) 944 { 945 struct pfsync_softc *sc = V_pfsyncif; 946 struct mbuf *m = *mp; 947 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 948 struct pfsync_header *ph; 949 struct pfsync_subheader subh; 950 951 int offset, len, flags = 0; 952 int rv; 953 uint16_t count; 954 955 PF_RULES_RLOCK_TRACKER; 956 957 *mp = NULL; 958 V_pfsyncstats.pfsyncs_ipackets++; 959 960 /* Verify that we have a sync interface configured. */ 961 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 962 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 963 goto done; 964 965 /* verify that the packet came in on the right interface */ 966 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 967 V_pfsyncstats.pfsyncs_badif++; 968 goto done; 969 } 970 971 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 972 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 973 /* verify that the IP TTL is 255. */ 974 if (ip6->ip6_hlim != PFSYNC_DFLTTL) { 975 V_pfsyncstats.pfsyncs_badttl++; 976 goto done; 977 } 978 979 980 offset = sizeof(*ip6); 981 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 982 V_pfsyncstats.pfsyncs_hdrops++; 983 goto done; 984 } 985 986 if (offset + sizeof(*ph) > m->m_len) { 987 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 988 V_pfsyncstats.pfsyncs_hdrops++; 989 return (IPPROTO_DONE); 990 } 991 ip6 = mtod(m, struct ip6_hdr *); 992 } 993 ph = (struct pfsync_header *)((char *)ip6 + offset); 994 995 /* verify the version */ 996 if (ph->version != PFSYNC_VERSION) { 997 V_pfsyncstats.pfsyncs_badver++; 998 goto done; 999 } 1000 1001 len = ntohs(ph->len) + offset; 1002 if (m->m_pkthdr.len < len) { 1003 V_pfsyncstats.pfsyncs_badlen++; 1004 goto done; 1005 } 1006 1007 /* 1008 * Trusting pf_chksum during packet processing, as well as seeking 1009 * in interface name tree, require holding PF_RULES_RLOCK(). 1010 */ 1011 PF_RULES_RLOCK(); 1012 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1013 flags = PFSYNC_SI_CKSUM; 1014 1015 offset += sizeof(*ph); 1016 while (offset <= len - sizeof(subh)) { 1017 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1018 offset += sizeof(subh); 1019 1020 if (subh.action >= PFSYNC_ACT_MAX) { 1021 V_pfsyncstats.pfsyncs_badact++; 1022 PF_RULES_RUNLOCK(); 1023 goto done; 1024 } 1025 1026 count = ntohs(subh.count); 1027 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1028 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1029 if (rv == -1) { 1030 PF_RULES_RUNLOCK(); 1031 return (IPPROTO_DONE); 1032 } 1033 1034 offset += rv; 1035 } 1036 PF_RULES_RUNLOCK(); 1037 1038 done: 1039 m_freem(m); 1040 return (IPPROTO_DONE); 1041 } 1042 #endif 1043 1044 static int 1045 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) 1046 { 1047 struct pfsync_clr *clr; 1048 struct mbuf *mp; 1049 int len = sizeof(*clr) * count; 1050 int i, offp; 1051 u_int32_t creatorid; 1052 1053 mp = m_pulldown(m, offset, len, &offp); 1054 if (mp == NULL) { 1055 V_pfsyncstats.pfsyncs_badlen++; 1056 return (-1); 1057 } 1058 clr = (struct pfsync_clr *)(mp->m_data + offp); 1059 1060 for (i = 0; i < count; i++) { 1061 creatorid = clr[i].creatorid; 1062 1063 if (clr[i].ifname[0] != '\0' && 1064 pfi_kkif_find(clr[i].ifname) == NULL) 1065 continue; 1066 1067 for (int i = 0; i <= V_pf_hashmask; i++) { 1068 struct pf_idhash *ih = &V_pf_idhash[i]; 1069 struct pf_kstate *s; 1070 relock: 1071 PF_HASHROW_LOCK(ih); 1072 LIST_FOREACH(s, &ih->states, entry) { 1073 if (s->creatorid == creatorid) { 1074 s->state_flags |= PFSTATE_NOSYNC; 1075 pf_remove_state(s); 1076 goto relock; 1077 } 1078 } 1079 PF_HASHROW_UNLOCK(ih); 1080 } 1081 } 1082 1083 return (len); 1084 } 1085 1086 static int 1087 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) 1088 { 1089 struct mbuf *mp; 1090 union pfsync_state_union *sa, *sp; 1091 int i, offp, total_len, msg_version, msg_len; 1092 1093 switch (action) { 1094 case PFSYNC_ACT_INS_1301: 1095 msg_len = sizeof(struct pfsync_state_1301); 1096 total_len = msg_len * count; 1097 msg_version = PFSYNC_MSG_VERSION_1301; 1098 break; 1099 case PFSYNC_ACT_INS_1400: 1100 msg_len = sizeof(struct pfsync_state_1400); 1101 total_len = msg_len * count; 1102 msg_version = PFSYNC_MSG_VERSION_1400; 1103 break; 1104 default: 1105 V_pfsyncstats.pfsyncs_badver++; 1106 return (-1); 1107 } 1108 1109 mp = m_pulldown(m, offset, total_len, &offp); 1110 if (mp == NULL) { 1111 V_pfsyncstats.pfsyncs_badlen++; 1112 return (-1); 1113 } 1114 sa = (union pfsync_state_union *)(mp->m_data + offp); 1115 1116 for (i = 0; i < count; i++) { 1117 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1118 1119 /* Check for invalid values. */ 1120 if (sp->pfs_1301.timeout >= PFTM_MAX || 1121 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1122 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || 1123 sp->pfs_1301.direction > PF_OUT || 1124 (sp->pfs_1301.af != AF_INET && 1125 sp->pfs_1301.af != AF_INET6)) { 1126 if (V_pf_status.debug >= PF_DEBUG_MISC) 1127 printf("%s: invalid value\n", __func__); 1128 V_pfsyncstats.pfsyncs_badval++; 1129 continue; 1130 } 1131 1132 if (pfsync_state_import(sp, flags, msg_version) != 0) 1133 V_pfsyncstats.pfsyncs_badact++; 1134 } 1135 1136 return (total_len); 1137 } 1138 1139 static int 1140 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) 1141 { 1142 struct pfsync_ins_ack *ia, *iaa; 1143 struct pf_kstate *st; 1144 1145 struct mbuf *mp; 1146 int len = count * sizeof(*ia); 1147 int offp, i; 1148 1149 mp = m_pulldown(m, offset, len, &offp); 1150 if (mp == NULL) { 1151 V_pfsyncstats.pfsyncs_badlen++; 1152 return (-1); 1153 } 1154 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 1155 1156 for (i = 0; i < count; i++) { 1157 ia = &iaa[i]; 1158 1159 st = pf_find_state_byid(ia->id, ia->creatorid); 1160 if (st == NULL) 1161 continue; 1162 1163 if (st->state_flags & PFSTATE_ACK) { 1164 pfsync_undefer_state(st, 0); 1165 } 1166 PF_STATE_UNLOCK(st); 1167 } 1168 /* 1169 * XXX this is not yet implemented, but we know the size of the 1170 * message so we can skip it. 1171 */ 1172 1173 return (count * sizeof(struct pfsync_ins_ack)); 1174 } 1175 1176 static int 1177 pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, 1178 struct pf_state_peer_export *dst) 1179 { 1180 int sync = 0; 1181 1182 PF_STATE_LOCK_ASSERT(st); 1183 1184 /* 1185 * The state should never go backwards except 1186 * for syn-proxy states. Neither should the 1187 * sequence window slide backwards. 1188 */ 1189 if ((st->src.state > src->state && 1190 (st->src.state < PF_TCPS_PROXY_SRC || 1191 src->state >= PF_TCPS_PROXY_SRC)) || 1192 1193 (st->src.state == src->state && 1194 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 1195 sync++; 1196 else 1197 pf_state_peer_ntoh(src, &st->src); 1198 1199 if ((st->dst.state > dst->state) || 1200 1201 (st->dst.state >= TCPS_SYN_SENT && 1202 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 1203 sync++; 1204 else 1205 pf_state_peer_ntoh(dst, &st->dst); 1206 1207 return (sync); 1208 } 1209 1210 static int 1211 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) 1212 { 1213 struct pfsync_softc *sc = V_pfsyncif; 1214 union pfsync_state_union *sa, *sp; 1215 struct pf_kstate *st; 1216 struct mbuf *mp; 1217 int sync, offp, i, total_len, msg_len, msg_version; 1218 1219 switch (action) { 1220 case PFSYNC_ACT_UPD_1301: 1221 msg_len = sizeof(struct pfsync_state_1301); 1222 total_len = msg_len * count; 1223 msg_version = PFSYNC_MSG_VERSION_1301; 1224 break; 1225 case PFSYNC_ACT_UPD_1400: 1226 msg_len = sizeof(struct pfsync_state_1400); 1227 total_len = msg_len * count; 1228 msg_version = PFSYNC_MSG_VERSION_1400; 1229 break; 1230 default: 1231 V_pfsyncstats.pfsyncs_badact++; 1232 return (-1); 1233 } 1234 1235 mp = m_pulldown(m, offset, total_len, &offp); 1236 if (mp == NULL) { 1237 V_pfsyncstats.pfsyncs_badlen++; 1238 return (-1); 1239 } 1240 sa = (union pfsync_state_union *)(mp->m_data + offp); 1241 1242 for (i = 0; i < count; i++) { 1243 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1244 1245 /* check for invalid values */ 1246 if (sp->pfs_1301.timeout >= PFTM_MAX || 1247 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1248 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { 1249 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1250 printf("pfsync_input: PFSYNC_ACT_UPD: " 1251 "invalid value\n"); 1252 } 1253 V_pfsyncstats.pfsyncs_badval++; 1254 continue; 1255 } 1256 1257 st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); 1258 if (st == NULL) { 1259 /* insert the update */ 1260 if (pfsync_state_import(sp, flags, msg_version)) 1261 V_pfsyncstats.pfsyncs_badstate++; 1262 continue; 1263 } 1264 1265 if (st->state_flags & PFSTATE_ACK) { 1266 pfsync_undefer_state(st, 1); 1267 } 1268 1269 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1270 sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); 1271 else { 1272 sync = 0; 1273 1274 /* 1275 * Non-TCP protocol state machine always go 1276 * forwards 1277 */ 1278 if (st->src.state > sp->pfs_1301.src.state) 1279 sync++; 1280 else 1281 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 1282 if (st->dst.state > sp->pfs_1301.dst.state) 1283 sync++; 1284 else 1285 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1286 } 1287 if (sync < 2) { 1288 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); 1289 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1290 st->expire = pf_get_uptime(); 1291 st->timeout = sp->pfs_1301.timeout; 1292 } 1293 st->pfsync_time = time_uptime; 1294 1295 if (sync) { 1296 V_pfsyncstats.pfsyncs_stale++; 1297 1298 pfsync_update_state(st); 1299 PF_STATE_UNLOCK(st); 1300 pfsync_push_all(sc); 1301 continue; 1302 } 1303 PF_STATE_UNLOCK(st); 1304 } 1305 1306 return (total_len); 1307 } 1308 1309 static int 1310 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) 1311 { 1312 struct pfsync_softc *sc = V_pfsyncif; 1313 struct pfsync_upd_c *ua, *up; 1314 struct pf_kstate *st; 1315 int len = count * sizeof(*up); 1316 int sync; 1317 struct mbuf *mp; 1318 int offp, i; 1319 1320 mp = m_pulldown(m, offset, len, &offp); 1321 if (mp == NULL) { 1322 V_pfsyncstats.pfsyncs_badlen++; 1323 return (-1); 1324 } 1325 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1326 1327 for (i = 0; i < count; i++) { 1328 up = &ua[i]; 1329 1330 /* check for invalid values */ 1331 if (up->timeout >= PFTM_MAX || 1332 up->src.state > PF_TCPS_PROXY_DST || 1333 up->dst.state > PF_TCPS_PROXY_DST) { 1334 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1335 printf("pfsync_input: " 1336 "PFSYNC_ACT_UPD_C: " 1337 "invalid value\n"); 1338 } 1339 V_pfsyncstats.pfsyncs_badval++; 1340 continue; 1341 } 1342 1343 st = pf_find_state_byid(up->id, up->creatorid); 1344 if (st == NULL) { 1345 /* We don't have this state. Ask for it. */ 1346 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1347 pfsync_request_update(up->creatorid, up->id); 1348 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1349 continue; 1350 } 1351 1352 if (st->state_flags & PFSTATE_ACK) { 1353 pfsync_undefer_state(st, 1); 1354 } 1355 1356 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1357 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1358 else { 1359 sync = 0; 1360 1361 /* 1362 * Non-TCP protocol state machine always go 1363 * forwards 1364 */ 1365 if (st->src.state > up->src.state) 1366 sync++; 1367 else 1368 pf_state_peer_ntoh(&up->src, &st->src); 1369 if (st->dst.state > up->dst.state) 1370 sync++; 1371 else 1372 pf_state_peer_ntoh(&up->dst, &st->dst); 1373 } 1374 if (sync < 2) { 1375 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1376 pf_state_peer_ntoh(&up->dst, &st->dst); 1377 st->expire = pf_get_uptime(); 1378 st->timeout = up->timeout; 1379 } 1380 st->pfsync_time = time_uptime; 1381 1382 if (sync) { 1383 V_pfsyncstats.pfsyncs_stale++; 1384 1385 pfsync_update_state(st); 1386 PF_STATE_UNLOCK(st); 1387 pfsync_push_all(sc); 1388 continue; 1389 } 1390 PF_STATE_UNLOCK(st); 1391 } 1392 1393 return (len); 1394 } 1395 1396 static int 1397 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) 1398 { 1399 struct pfsync_upd_req *ur, *ura; 1400 struct mbuf *mp; 1401 int len = count * sizeof(*ur); 1402 int i, offp; 1403 1404 struct pf_kstate *st; 1405 1406 mp = m_pulldown(m, offset, len, &offp); 1407 if (mp == NULL) { 1408 V_pfsyncstats.pfsyncs_badlen++; 1409 return (-1); 1410 } 1411 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1412 1413 for (i = 0; i < count; i++) { 1414 ur = &ura[i]; 1415 1416 if (ur->id == 0 && ur->creatorid == 0) 1417 pfsync_bulk_start(); 1418 else { 1419 st = pf_find_state_byid(ur->id, ur->creatorid); 1420 if (st == NULL) { 1421 V_pfsyncstats.pfsyncs_badstate++; 1422 continue; 1423 } 1424 if (st->state_flags & PFSTATE_NOSYNC) { 1425 PF_STATE_UNLOCK(st); 1426 continue; 1427 } 1428 1429 pfsync_update_state_req(st); 1430 PF_STATE_UNLOCK(st); 1431 } 1432 } 1433 1434 return (len); 1435 } 1436 1437 static int 1438 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) 1439 { 1440 struct mbuf *mp; 1441 struct pfsync_del_c *sa, *sp; 1442 struct pf_kstate *st; 1443 int len = count * sizeof(*sp); 1444 int offp, i; 1445 1446 mp = m_pulldown(m, offset, len, &offp); 1447 if (mp == NULL) { 1448 V_pfsyncstats.pfsyncs_badlen++; 1449 return (-1); 1450 } 1451 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1452 1453 for (i = 0; i < count; i++) { 1454 sp = &sa[i]; 1455 1456 st = pf_find_state_byid(sp->id, sp->creatorid); 1457 if (st == NULL) { 1458 V_pfsyncstats.pfsyncs_badstate++; 1459 continue; 1460 } 1461 1462 st->state_flags |= PFSTATE_NOSYNC; 1463 pf_remove_state(st); 1464 } 1465 1466 return (len); 1467 } 1468 1469 static int 1470 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) 1471 { 1472 struct pfsync_softc *sc = V_pfsyncif; 1473 struct pfsync_bus *bus; 1474 struct mbuf *mp; 1475 int len = count * sizeof(*bus); 1476 int offp; 1477 1478 PFSYNC_BLOCK(sc); 1479 1480 /* If we're not waiting for a bulk update, who cares. */ 1481 if (sc->sc_ureq_sent == 0) { 1482 PFSYNC_BUNLOCK(sc); 1483 return (len); 1484 } 1485 1486 mp = m_pulldown(m, offset, len, &offp); 1487 if (mp == NULL) { 1488 PFSYNC_BUNLOCK(sc); 1489 V_pfsyncstats.pfsyncs_badlen++; 1490 return (-1); 1491 } 1492 bus = (struct pfsync_bus *)(mp->m_data + offp); 1493 1494 switch (bus->status) { 1495 case PFSYNC_BUS_START: 1496 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1497 V_pf_limits[PF_LIMIT_STATES].limit / 1498 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1499 sizeof(union pfsync_state_union)), 1500 pfsync_bulk_fail, sc); 1501 if (V_pf_status.debug >= PF_DEBUG_MISC) 1502 printf("pfsync: received bulk update start\n"); 1503 break; 1504 1505 case PFSYNC_BUS_END: 1506 if (time_uptime - ntohl(bus->endtime) >= 1507 sc->sc_ureq_sent) { 1508 /* that's it, we're happy */ 1509 sc->sc_ureq_sent = 0; 1510 sc->sc_bulk_tries = 0; 1511 callout_stop(&sc->sc_bulkfail_tmo); 1512 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1513 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1514 "pfsync bulk done"); 1515 sc->sc_flags |= PFSYNCF_OK; 1516 if (V_pf_status.debug >= PF_DEBUG_MISC) 1517 printf("pfsync: received valid " 1518 "bulk update end\n"); 1519 } else { 1520 if (V_pf_status.debug >= PF_DEBUG_MISC) 1521 printf("pfsync: received invalid " 1522 "bulk update end: bad timestamp\n"); 1523 } 1524 break; 1525 } 1526 PFSYNC_BUNLOCK(sc); 1527 1528 return (len); 1529 } 1530 1531 static int 1532 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) 1533 { 1534 int len = count * sizeof(struct pfsync_tdb); 1535 1536 #if defined(IPSEC) 1537 struct pfsync_tdb *tp; 1538 struct mbuf *mp; 1539 int offp; 1540 int i; 1541 int s; 1542 1543 mp = m_pulldown(m, offset, len, &offp); 1544 if (mp == NULL) { 1545 V_pfsyncstats.pfsyncs_badlen++; 1546 return (-1); 1547 } 1548 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1549 1550 for (i = 0; i < count; i++) 1551 pfsync_update_net_tdb(&tp[i]); 1552 #endif 1553 1554 return (len); 1555 } 1556 1557 #if defined(IPSEC) 1558 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1559 static void 1560 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1561 { 1562 struct tdb *tdb; 1563 int s; 1564 1565 /* check for invalid values */ 1566 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1567 (pt->dst.sa.sa_family != AF_INET && 1568 pt->dst.sa.sa_family != AF_INET6)) 1569 goto bad; 1570 1571 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1572 if (tdb) { 1573 pt->rpl = ntohl(pt->rpl); 1574 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1575 1576 /* Neither replay nor byte counter should ever decrease. */ 1577 if (pt->rpl < tdb->tdb_rpl || 1578 pt->cur_bytes < tdb->tdb_cur_bytes) { 1579 goto bad; 1580 } 1581 1582 tdb->tdb_rpl = pt->rpl; 1583 tdb->tdb_cur_bytes = pt->cur_bytes; 1584 } 1585 return; 1586 1587 bad: 1588 if (V_pf_status.debug >= PF_DEBUG_MISC) 1589 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1590 "invalid value\n"); 1591 V_pfsyncstats.pfsyncs_badstate++; 1592 return; 1593 } 1594 #endif 1595 1596 static int 1597 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) 1598 { 1599 /* check if we are at the right place in the packet */ 1600 if (offset != m->m_pkthdr.len) 1601 V_pfsyncstats.pfsyncs_badlen++; 1602 1603 /* we're done. free and let the caller return */ 1604 m_freem(m); 1605 return (-1); 1606 } 1607 1608 static int 1609 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) 1610 { 1611 V_pfsyncstats.pfsyncs_badact++; 1612 1613 m_freem(m); 1614 return (-1); 1615 } 1616 1617 static int 1618 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1619 struct route *rt) 1620 { 1621 m_freem(m); 1622 return (0); 1623 } 1624 1625 /* ARGSUSED */ 1626 static int 1627 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1628 { 1629 struct pfsync_softc *sc = ifp->if_softc; 1630 struct ifreq *ifr = (struct ifreq *)data; 1631 struct pfsyncreq pfsyncr; 1632 size_t nvbuflen; 1633 int error; 1634 int c; 1635 1636 switch (cmd) { 1637 case SIOCSIFFLAGS: 1638 PFSYNC_LOCK(sc); 1639 if (ifp->if_flags & IFF_UP) { 1640 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1641 PFSYNC_UNLOCK(sc); 1642 pfsync_pointers_init(); 1643 } else { 1644 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1645 PFSYNC_UNLOCK(sc); 1646 pfsync_pointers_uninit(); 1647 } 1648 break; 1649 case SIOCSIFMTU: 1650 if (!sc->sc_sync_if || 1651 ifr->ifr_mtu <= PFSYNC_MINPKT || 1652 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1653 return (EINVAL); 1654 if (ifr->ifr_mtu < ifp->if_mtu) { 1655 for (c = 0; c < pfsync_buckets; c++) { 1656 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1657 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1658 pfsync_sendout(1, c); 1659 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1660 } 1661 } 1662 ifp->if_mtu = ifr->ifr_mtu; 1663 break; 1664 case SIOCGETPFSYNC: 1665 bzero(&pfsyncr, sizeof(pfsyncr)); 1666 PFSYNC_LOCK(sc); 1667 if (sc->sc_sync_if) { 1668 strlcpy(pfsyncr.pfsyncr_syncdev, 1669 sc->sc_sync_if->if_xname, IFNAMSIZ); 1670 } 1671 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1672 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1673 pfsyncr.pfsyncr_defer = sc->sc_flags; 1674 PFSYNC_UNLOCK(sc); 1675 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1676 sizeof(pfsyncr))); 1677 1678 case SIOCGETPFSYNCNV: 1679 { 1680 nvlist_t *nvl_syncpeer; 1681 nvlist_t *nvl = nvlist_create(0); 1682 1683 if (nvl == NULL) 1684 return (ENOMEM); 1685 1686 if (sc->sc_sync_if) 1687 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1688 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1689 nvlist_add_number(nvl, "flags", sc->sc_flags); 1690 nvlist_add_number(nvl, "version", sc->sc_version); 1691 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1692 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1693 1694 void *packed = NULL; 1695 packed = nvlist_pack(nvl, &nvbuflen); 1696 if (packed == NULL) { 1697 free(packed, M_NVLIST); 1698 nvlist_destroy(nvl); 1699 return (ENOMEM); 1700 } 1701 1702 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1703 ifr->ifr_cap_nv.length = nvbuflen; 1704 ifr->ifr_cap_nv.buffer = NULL; 1705 free(packed, M_NVLIST); 1706 nvlist_destroy(nvl); 1707 return (EFBIG); 1708 } 1709 1710 ifr->ifr_cap_nv.length = nvbuflen; 1711 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1712 1713 nvlist_destroy(nvl); 1714 nvlist_destroy(nvl_syncpeer); 1715 free(packed, M_NVLIST); 1716 break; 1717 } 1718 1719 case SIOCSETPFSYNC: 1720 { 1721 struct pfsync_kstatus status; 1722 1723 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1724 return (error); 1725 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1726 sizeof(pfsyncr)))) 1727 return (error); 1728 1729 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1730 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1731 1732 error = pfsync_kstatus_to_softc(&status, sc); 1733 return (error); 1734 } 1735 case SIOCSETPFSYNCNV: 1736 { 1737 struct pfsync_kstatus status; 1738 void *data; 1739 nvlist_t *nvl; 1740 1741 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1742 return (error); 1743 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1744 return (EINVAL); 1745 1746 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1747 1748 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1749 ifr->ifr_cap_nv.length)) != 0) { 1750 free(data, M_TEMP); 1751 return (error); 1752 } 1753 1754 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1755 free(data, M_TEMP); 1756 return (EINVAL); 1757 } 1758 1759 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1760 pfsync_nvstatus_to_kstatus(nvl, &status); 1761 1762 nvlist_destroy(nvl); 1763 free(data, M_TEMP); 1764 1765 error = pfsync_kstatus_to_softc(&status, sc); 1766 return (error); 1767 } 1768 default: 1769 return (ENOTTY); 1770 } 1771 1772 return (0); 1773 } 1774 1775 static void 1776 pfsync_out_state_1301(struct pf_kstate *st, void *buf) 1777 { 1778 union pfsync_state_union *sp = buf; 1779 1780 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); 1781 } 1782 1783 static void 1784 pfsync_out_state_1400(struct pf_kstate *st, void *buf) 1785 { 1786 union pfsync_state_union *sp = buf; 1787 1788 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); 1789 } 1790 1791 static void 1792 pfsync_out_iack(struct pf_kstate *st, void *buf) 1793 { 1794 struct pfsync_ins_ack *iack = buf; 1795 1796 iack->id = st->id; 1797 iack->creatorid = st->creatorid; 1798 } 1799 1800 static void 1801 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1802 { 1803 struct pfsync_upd_c *up = buf; 1804 1805 bzero(up, sizeof(*up)); 1806 up->id = st->id; 1807 pf_state_peer_hton(&st->src, &up->src); 1808 pf_state_peer_hton(&st->dst, &up->dst); 1809 up->creatorid = st->creatorid; 1810 up->timeout = st->timeout; 1811 } 1812 1813 static void 1814 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1815 { 1816 struct pfsync_del_c *dp = buf; 1817 1818 dp->id = st->id; 1819 dp->creatorid = st->creatorid; 1820 st->state_flags |= PFSTATE_NOSYNC; 1821 } 1822 1823 static void 1824 pfsync_drop_all(struct pfsync_softc *sc) 1825 { 1826 struct pfsync_bucket *b; 1827 int c; 1828 1829 for (c = 0; c < pfsync_buckets; c++) { 1830 b = &sc->sc_buckets[c]; 1831 1832 PFSYNC_BUCKET_LOCK(b); 1833 pfsync_drop(sc, c); 1834 PFSYNC_BUCKET_UNLOCK(b); 1835 } 1836 } 1837 1838 static void 1839 pfsync_drop(struct pfsync_softc *sc, int c) 1840 { 1841 struct pf_kstate *st, *next; 1842 struct pfsync_upd_req_item *ur; 1843 struct pfsync_bucket *b; 1844 enum pfsync_q_id q; 1845 1846 b = &sc->sc_buckets[c]; 1847 PFSYNC_BUCKET_LOCK_ASSERT(b); 1848 1849 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1850 if (TAILQ_EMPTY(&b->b_qs[q])) 1851 continue; 1852 1853 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1854 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1855 ("%s: st->sync_state %d == q %d", 1856 __func__, st->sync_state, q)); 1857 st->sync_state = PFSYNC_S_NONE; 1858 pf_release_state(st); 1859 } 1860 TAILQ_INIT(&b->b_qs[q]); 1861 } 1862 1863 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1864 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1865 free(ur, M_PFSYNC); 1866 } 1867 1868 b->b_len = PFSYNC_MINPKT; 1869 free(b->b_plus, M_PFSYNC); 1870 b->b_plus = NULL; 1871 b->b_pluslen = 0; 1872 } 1873 1874 static void 1875 pfsync_sendout(int schedswi, int c) 1876 { 1877 struct pfsync_softc *sc = V_pfsyncif; 1878 struct ifnet *ifp = sc->sc_ifp; 1879 struct mbuf *m; 1880 struct pfsync_header *ph; 1881 struct pfsync_subheader *subh; 1882 struct pf_kstate *st, *st_next; 1883 struct pfsync_upd_req_item *ur; 1884 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1885 size_t len; 1886 int aflen, offset, count = 0; 1887 enum pfsync_q_id q; 1888 1889 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1890 KASSERT(b->b_len > PFSYNC_MINPKT, 1891 ("%s: sc_len %zu", __func__, b->b_len)); 1892 PFSYNC_BUCKET_LOCK_ASSERT(b); 1893 1894 if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) { 1895 pfsync_drop(sc, c); 1896 return; 1897 } 1898 1899 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1900 if (m == NULL) { 1901 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1902 V_pfsyncstats.pfsyncs_onomem++; 1903 return; 1904 } 1905 m->m_data += max_linkhdr; 1906 bzero(m->m_data, b->b_len); 1907 1908 len = b->b_len; 1909 1910 /* build the ip header */ 1911 switch (sc->sc_sync_peer.ss_family) { 1912 #ifdef INET 1913 case AF_INET: 1914 { 1915 struct ip *ip; 1916 1917 ip = mtod(m, struct ip *); 1918 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1919 aflen = offset = sizeof(*ip); 1920 1921 len -= sizeof(union inet_template) - sizeof(struct ip); 1922 ip->ip_len = htons(len); 1923 ip_fillid(ip, V_ip_random_id); 1924 break; 1925 } 1926 #endif 1927 #ifdef INET6 1928 case AF_INET6: 1929 { 1930 struct ip6_hdr *ip6; 1931 1932 ip6 = mtod(m, struct ip6_hdr *); 1933 bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); 1934 aflen = offset = sizeof(*ip6); 1935 1936 len -= sizeof(union inet_template) - sizeof(struct ip6_hdr); 1937 ip6->ip6_plen = htons(len); 1938 break; 1939 } 1940 #endif 1941 default: 1942 m_freem(m); 1943 pfsync_drop(sc, c); 1944 return; 1945 } 1946 m->m_len = m->m_pkthdr.len = len; 1947 1948 /* build the pfsync header */ 1949 ph = (struct pfsync_header *)(m->m_data + offset); 1950 offset += sizeof(*ph); 1951 1952 ph->version = PFSYNC_VERSION; 1953 ph->len = htons(len - aflen); 1954 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1955 1956 /* walk the queues */ 1957 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1958 if (TAILQ_EMPTY(&b->b_qs[q])) 1959 continue; 1960 1961 subh = (struct pfsync_subheader *)(m->m_data + offset); 1962 offset += sizeof(*subh); 1963 1964 count = 0; 1965 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1966 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1967 ("%s: st->sync_state == q", 1968 __func__)); 1969 /* 1970 * XXXGL: some of write methods do unlocked reads 1971 * of state data :( 1972 */ 1973 pfsync_qs[q].write(st, m->m_data + offset); 1974 offset += pfsync_qs[q].len; 1975 st->sync_state = PFSYNC_S_NONE; 1976 pf_release_state(st); 1977 count++; 1978 } 1979 TAILQ_INIT(&b->b_qs[q]); 1980 1981 subh->action = pfsync_qs[q].action; 1982 subh->count = htons(count); 1983 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1984 } 1985 1986 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1987 subh = (struct pfsync_subheader *)(m->m_data + offset); 1988 offset += sizeof(*subh); 1989 1990 count = 0; 1991 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1992 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1993 1994 bcopy(&ur->ur_msg, m->m_data + offset, 1995 sizeof(ur->ur_msg)); 1996 offset += sizeof(ur->ur_msg); 1997 free(ur, M_PFSYNC); 1998 count++; 1999 } 2000 2001 subh->action = PFSYNC_ACT_UPD_REQ; 2002 subh->count = htons(count); 2003 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 2004 } 2005 2006 /* has someone built a custom region for us to add? */ 2007 if (b->b_plus != NULL) { 2008 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 2009 offset += b->b_pluslen; 2010 2011 free(b->b_plus, M_PFSYNC); 2012 b->b_plus = NULL; 2013 b->b_pluslen = 0; 2014 } 2015 2016 subh = (struct pfsync_subheader *)(m->m_data + offset); 2017 offset += sizeof(*subh); 2018 2019 subh->action = PFSYNC_ACT_EOF; 2020 subh->count = htons(1); 2021 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 2022 2023 /* we're done, let's put it on the wire */ 2024 if (bpf_peers_present(ifp->if_bpf)) { 2025 m->m_data += aflen; 2026 m->m_len = m->m_pkthdr.len = len - aflen; 2027 bpf_mtap(ifp->if_bpf, m); 2028 m->m_data -= aflen; 2029 m->m_len = m->m_pkthdr.len = len; 2030 } 2031 2032 if (sc->sc_sync_if == NULL) { 2033 b->b_len = PFSYNC_MINPKT; 2034 m_freem(m); 2035 return; 2036 } 2037 2038 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 2039 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 2040 b->b_len = PFSYNC_MINPKT; 2041 2042 if (!_IF_QFULL(&b->b_snd)) 2043 _IF_ENQUEUE(&b->b_snd, m); 2044 else { 2045 m_freem(m); 2046 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 2047 } 2048 if (schedswi) 2049 swi_sched(V_pfsync_swi_cookie, 0); 2050 } 2051 2052 static void 2053 pfsync_insert_state(struct pf_kstate *st) 2054 { 2055 struct pfsync_softc *sc = V_pfsyncif; 2056 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2057 2058 if (st->state_flags & PFSTATE_NOSYNC) 2059 return; 2060 2061 if ((st->rule->rule_flag & PFRULE_NOSYNC) || 2062 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 2063 st->state_flags |= PFSTATE_NOSYNC; 2064 return; 2065 } 2066 2067 KASSERT(st->sync_state == PFSYNC_S_NONE, 2068 ("%s: st->sync_state %u", __func__, st->sync_state)); 2069 2070 PFSYNC_BUCKET_LOCK(b); 2071 if (b->b_len == PFSYNC_MINPKT) 2072 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2073 2074 pfsync_q_ins(st, PFSYNC_S_INS, true); 2075 PFSYNC_BUCKET_UNLOCK(b); 2076 2077 st->sync_updates = 0; 2078 } 2079 2080 static int 2081 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 2082 { 2083 struct pfsync_softc *sc = V_pfsyncif; 2084 struct pfsync_deferral *pd; 2085 struct pfsync_bucket *b; 2086 2087 if (m->m_flags & (M_BCAST|M_MCAST)) 2088 return (0); 2089 2090 if (sc == NULL) 2091 return (0); 2092 2093 b = pfsync_get_bucket(sc, st); 2094 2095 PFSYNC_LOCK(sc); 2096 2097 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 2098 !(sc->sc_flags & PFSYNCF_DEFER)) { 2099 PFSYNC_UNLOCK(sc); 2100 return (0); 2101 } 2102 2103 PFSYNC_BUCKET_LOCK(b); 2104 PFSYNC_UNLOCK(sc); 2105 2106 if (b->b_deferred >= 128) 2107 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 2108 2109 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 2110 if (pd == NULL) { 2111 PFSYNC_BUCKET_UNLOCK(b); 2112 return (0); 2113 } 2114 b->b_deferred++; 2115 2116 m->m_flags |= M_SKIP_FIREWALL; 2117 st->state_flags |= PFSTATE_ACK; 2118 2119 pd->pd_sc = sc; 2120 pd->pd_st = st; 2121 pf_ref_state(st); 2122 pd->pd_m = m; 2123 2124 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 2125 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 2126 callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, 2127 pfsync_defer_tmo, pd); 2128 2129 pfsync_push(b); 2130 PFSYNC_BUCKET_UNLOCK(b); 2131 2132 return (1); 2133 } 2134 2135 static void 2136 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2137 { 2138 struct pfsync_softc *sc = pd->pd_sc; 2139 struct mbuf *m = pd->pd_m; 2140 struct pf_kstate *st = pd->pd_st; 2141 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2142 2143 PFSYNC_BUCKET_LOCK_ASSERT(b); 2144 2145 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2146 b->b_deferred--; 2147 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2148 free(pd, M_PFSYNC); 2149 pf_release_state(st); 2150 2151 if (drop) 2152 m_freem(m); 2153 else { 2154 _IF_ENQUEUE(&b->b_snd, m); 2155 pfsync_push(b); 2156 } 2157 } 2158 2159 static void 2160 pfsync_defer_tmo(void *arg) 2161 { 2162 struct epoch_tracker et; 2163 struct pfsync_deferral *pd = arg; 2164 struct pfsync_softc *sc = pd->pd_sc; 2165 struct mbuf *m = pd->pd_m; 2166 struct pf_kstate *st = pd->pd_st; 2167 struct pfsync_bucket *b; 2168 2169 CURVNET_SET(sc->sc_ifp->if_vnet); 2170 2171 b = pfsync_get_bucket(sc, st); 2172 2173 PFSYNC_BUCKET_LOCK_ASSERT(b); 2174 2175 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2176 b->b_deferred--; 2177 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2178 PFSYNC_BUCKET_UNLOCK(b); 2179 free(pd, M_PFSYNC); 2180 2181 if (sc->sc_sync_if == NULL) { 2182 pf_release_state(st); 2183 m_freem(m); 2184 CURVNET_RESTORE(); 2185 return; 2186 } 2187 2188 NET_EPOCH_ENTER(et); 2189 2190 pfsync_tx(sc, m); 2191 2192 pf_release_state(st); 2193 2194 CURVNET_RESTORE(); 2195 NET_EPOCH_EXIT(et); 2196 } 2197 2198 static void 2199 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 2200 { 2201 struct pfsync_softc *sc = V_pfsyncif; 2202 struct pfsync_deferral *pd; 2203 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2204 2205 PFSYNC_BUCKET_LOCK_ASSERT(b); 2206 2207 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 2208 if (pd->pd_st == st) { 2209 if (callout_stop(&pd->pd_tmo) > 0) 2210 pfsync_undefer(pd, drop); 2211 2212 return; 2213 } 2214 } 2215 2216 panic("%s: unable to find deferred state", __func__); 2217 } 2218 2219 static void 2220 pfsync_undefer_state(struct pf_kstate *st, int drop) 2221 { 2222 struct pfsync_softc *sc = V_pfsyncif; 2223 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2224 2225 PFSYNC_BUCKET_LOCK(b); 2226 pfsync_undefer_state_locked(st, drop); 2227 PFSYNC_BUCKET_UNLOCK(b); 2228 } 2229 2230 static struct pfsync_bucket* 2231 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 2232 { 2233 int c = PF_IDHASH(st) % pfsync_buckets; 2234 return &sc->sc_buckets[c]; 2235 } 2236 2237 static void 2238 pfsync_update_state(struct pf_kstate *st) 2239 { 2240 struct pfsync_softc *sc = V_pfsyncif; 2241 bool sync = false, ref = true; 2242 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2243 2244 PF_STATE_LOCK_ASSERT(st); 2245 PFSYNC_BUCKET_LOCK(b); 2246 2247 if (st->state_flags & PFSTATE_ACK) 2248 pfsync_undefer_state_locked(st, 0); 2249 if (st->state_flags & PFSTATE_NOSYNC) { 2250 if (st->sync_state != PFSYNC_S_NONE) 2251 pfsync_q_del(st, true, b); 2252 PFSYNC_BUCKET_UNLOCK(b); 2253 return; 2254 } 2255 2256 if (b->b_len == PFSYNC_MINPKT) 2257 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2258 2259 switch (st->sync_state) { 2260 case PFSYNC_S_UPD_C: 2261 case PFSYNC_S_UPD: 2262 case PFSYNC_S_INS: 2263 /* we're already handling it */ 2264 2265 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2266 st->sync_updates++; 2267 if (st->sync_updates >= sc->sc_maxupdates) 2268 sync = true; 2269 } 2270 break; 2271 2272 case PFSYNC_S_IACK: 2273 pfsync_q_del(st, false, b); 2274 ref = false; 2275 /* FALLTHROUGH */ 2276 2277 case PFSYNC_S_NONE: 2278 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 2279 st->sync_updates = 0; 2280 break; 2281 2282 default: 2283 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2284 } 2285 2286 if (sync || (time_uptime - st->pfsync_time) < 2) 2287 pfsync_push(b); 2288 2289 PFSYNC_BUCKET_UNLOCK(b); 2290 } 2291 2292 static void 2293 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2294 { 2295 struct pfsync_softc *sc = V_pfsyncif; 2296 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2297 struct pfsync_upd_req_item *item; 2298 size_t nlen = sizeof(struct pfsync_upd_req); 2299 2300 PFSYNC_BUCKET_LOCK_ASSERT(b); 2301 2302 /* 2303 * This code does a bit to prevent multiple update requests for the 2304 * same state being generated. It searches current subheader queue, 2305 * but it doesn't lookup into queue of already packed datagrams. 2306 */ 2307 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 2308 if (item->ur_msg.id == id && 2309 item->ur_msg.creatorid == creatorid) 2310 return; 2311 2312 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 2313 if (item == NULL) 2314 return; /* XXX stats */ 2315 2316 item->ur_msg.id = id; 2317 item->ur_msg.creatorid = creatorid; 2318 2319 if (TAILQ_EMPTY(&b->b_upd_req_list)) 2320 nlen += sizeof(struct pfsync_subheader); 2321 2322 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2323 pfsync_sendout(0, 0); 2324 2325 nlen = sizeof(struct pfsync_subheader) + 2326 sizeof(struct pfsync_upd_req); 2327 } 2328 2329 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2330 b->b_len += nlen; 2331 2332 pfsync_push(b); 2333 } 2334 2335 static bool 2336 pfsync_update_state_req(struct pf_kstate *st) 2337 { 2338 struct pfsync_softc *sc = V_pfsyncif; 2339 bool ref = true, full = false; 2340 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2341 2342 PF_STATE_LOCK_ASSERT(st); 2343 PFSYNC_BUCKET_LOCK(b); 2344 2345 if (st->state_flags & PFSTATE_NOSYNC) { 2346 if (st->sync_state != PFSYNC_S_NONE) 2347 pfsync_q_del(st, true, b); 2348 PFSYNC_BUCKET_UNLOCK(b); 2349 return (full); 2350 } 2351 2352 switch (st->sync_state) { 2353 case PFSYNC_S_UPD_C: 2354 case PFSYNC_S_IACK: 2355 pfsync_q_del(st, false, b); 2356 ref = false; 2357 /* FALLTHROUGH */ 2358 2359 case PFSYNC_S_NONE: 2360 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2361 pfsync_push(b); 2362 break; 2363 2364 case PFSYNC_S_INS: 2365 case PFSYNC_S_UPD: 2366 case PFSYNC_S_DEL_C: 2367 /* we're already handling it */ 2368 break; 2369 2370 default: 2371 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2372 } 2373 2374 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) 2375 full = true; 2376 2377 PFSYNC_BUCKET_UNLOCK(b); 2378 2379 return (full); 2380 } 2381 2382 static void 2383 pfsync_delete_state(struct pf_kstate *st) 2384 { 2385 struct pfsync_softc *sc = V_pfsyncif; 2386 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2387 bool ref = true; 2388 2389 PFSYNC_BUCKET_LOCK(b); 2390 if (st->state_flags & PFSTATE_ACK) 2391 pfsync_undefer_state_locked(st, 1); 2392 if (st->state_flags & PFSTATE_NOSYNC) { 2393 if (st->sync_state != PFSYNC_S_NONE) 2394 pfsync_q_del(st, true, b); 2395 PFSYNC_BUCKET_UNLOCK(b); 2396 return; 2397 } 2398 2399 if (b->b_len == PFSYNC_MINPKT) 2400 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2401 2402 switch (st->sync_state) { 2403 case PFSYNC_S_INS: 2404 /* We never got to tell the world so just forget about it. */ 2405 pfsync_q_del(st, true, b); 2406 break; 2407 2408 case PFSYNC_S_UPD_C: 2409 case PFSYNC_S_UPD: 2410 case PFSYNC_S_IACK: 2411 pfsync_q_del(st, false, b); 2412 ref = false; 2413 /* FALLTHROUGH */ 2414 2415 case PFSYNC_S_NONE: 2416 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2417 break; 2418 2419 default: 2420 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2421 } 2422 2423 PFSYNC_BUCKET_UNLOCK(b); 2424 } 2425 2426 static void 2427 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2428 { 2429 struct { 2430 struct pfsync_subheader subh; 2431 struct pfsync_clr clr; 2432 } __packed r; 2433 2434 bzero(&r, sizeof(r)); 2435 2436 r.subh.action = PFSYNC_ACT_CLR; 2437 r.subh.count = htons(1); 2438 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2439 2440 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2441 r.clr.creatorid = creatorid; 2442 2443 pfsync_send_plus(&r, sizeof(r)); 2444 } 2445 2446 static enum pfsync_q_id 2447 pfsync_sstate_to_qid(u_int8_t sync_state) 2448 { 2449 struct pfsync_softc *sc = V_pfsyncif; 2450 2451 switch (sync_state) { 2452 case PFSYNC_S_INS: 2453 switch (sc->sc_version) { 2454 case PFSYNC_MSG_VERSION_1301: 2455 return PFSYNC_Q_INS_1301; 2456 case PFSYNC_MSG_VERSION_1400: 2457 return PFSYNC_Q_INS_1400; 2458 } 2459 break; 2460 case PFSYNC_S_IACK: 2461 return PFSYNC_Q_IACK; 2462 case PFSYNC_S_UPD: 2463 switch (sc->sc_version) { 2464 case PFSYNC_MSG_VERSION_1301: 2465 return PFSYNC_Q_UPD_1301; 2466 case PFSYNC_MSG_VERSION_1400: 2467 return PFSYNC_Q_UPD_1400; 2468 } 2469 break; 2470 case PFSYNC_S_UPD_C: 2471 return PFSYNC_Q_UPD_C; 2472 case PFSYNC_S_DEL_C: 2473 return PFSYNC_Q_DEL_C; 2474 default: 2475 panic("%s: Unsupported st->sync_state 0x%02x", 2476 __func__, sync_state); 2477 } 2478 2479 panic("%s: Unsupported pfsync_msg_version %d", 2480 __func__, sc->sc_version); 2481 } 2482 2483 static void 2484 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) 2485 { 2486 enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); 2487 struct pfsync_softc *sc = V_pfsyncif; 2488 size_t nlen = pfsync_qs[q].len; 2489 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2490 2491 PFSYNC_BUCKET_LOCK_ASSERT(b); 2492 2493 KASSERT(st->sync_state == PFSYNC_S_NONE, 2494 ("%s: st->sync_state %u", __func__, st->sync_state)); 2495 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2496 b->b_len)); 2497 2498 if (TAILQ_EMPTY(&b->b_qs[q])) 2499 nlen += sizeof(struct pfsync_subheader); 2500 2501 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2502 pfsync_sendout(1, b->b_id); 2503 2504 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2505 } 2506 2507 b->b_len += nlen; 2508 st->sync_state = pfsync_qid_sstate[q]; 2509 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2510 if (ref) 2511 pf_ref_state(st); 2512 } 2513 2514 static void 2515 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2516 { 2517 enum pfsync_q_id q; 2518 2519 PFSYNC_BUCKET_LOCK_ASSERT(b); 2520 KASSERT(st->sync_state != PFSYNC_S_NONE, 2521 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2522 2523 q = pfsync_sstate_to_qid(st->sync_state); 2524 b->b_len -= pfsync_qs[q].len; 2525 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2526 st->sync_state = PFSYNC_S_NONE; 2527 if (unref) 2528 pf_release_state(st); 2529 2530 if (TAILQ_EMPTY(&b->b_qs[q])) 2531 b->b_len -= sizeof(struct pfsync_subheader); 2532 } 2533 2534 static void 2535 pfsync_bulk_start(void) 2536 { 2537 struct pfsync_softc *sc = V_pfsyncif; 2538 2539 if (V_pf_status.debug >= PF_DEBUG_MISC) 2540 printf("pfsync: received bulk update request\n"); 2541 2542 PFSYNC_BLOCK(sc); 2543 2544 sc->sc_ureq_received = time_uptime; 2545 sc->sc_bulk_hashid = 0; 2546 sc->sc_bulk_stateid = 0; 2547 pfsync_bulk_status(PFSYNC_BUS_START); 2548 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2549 PFSYNC_BUNLOCK(sc); 2550 } 2551 2552 static void 2553 pfsync_bulk_update(void *arg) 2554 { 2555 struct pfsync_softc *sc = arg; 2556 struct pf_kstate *s; 2557 int i; 2558 2559 PFSYNC_BLOCK_ASSERT(sc); 2560 CURVNET_SET(sc->sc_ifp->if_vnet); 2561 2562 /* 2563 * Start with last state from previous invocation. 2564 * It may had gone, in this case start from the 2565 * hash slot. 2566 */ 2567 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2568 2569 if (s != NULL) 2570 i = PF_IDHASH(s); 2571 else 2572 i = sc->sc_bulk_hashid; 2573 2574 for (; i <= V_pf_hashmask; i++) { 2575 struct pf_idhash *ih = &V_pf_idhash[i]; 2576 2577 if (s != NULL) 2578 PF_HASHROW_ASSERT(ih); 2579 else { 2580 PF_HASHROW_LOCK(ih); 2581 s = LIST_FIRST(&ih->states); 2582 } 2583 2584 for (; s; s = LIST_NEXT(s, entry)) { 2585 if (s->sync_state == PFSYNC_S_NONE && 2586 s->timeout < PFTM_MAX && 2587 s->pfsync_time <= sc->sc_ureq_received) { 2588 if (pfsync_update_state_req(s)) { 2589 /* We've filled a packet. */ 2590 sc->sc_bulk_hashid = i; 2591 sc->sc_bulk_stateid = s->id; 2592 sc->sc_bulk_creatorid = s->creatorid; 2593 PF_HASHROW_UNLOCK(ih); 2594 callout_reset(&sc->sc_bulk_tmo, 1, 2595 pfsync_bulk_update, sc); 2596 goto full; 2597 } 2598 } 2599 } 2600 PF_HASHROW_UNLOCK(ih); 2601 } 2602 2603 /* We're done. */ 2604 pfsync_bulk_status(PFSYNC_BUS_END); 2605 full: 2606 CURVNET_RESTORE(); 2607 } 2608 2609 static void 2610 pfsync_bulk_status(u_int8_t status) 2611 { 2612 struct { 2613 struct pfsync_subheader subh; 2614 struct pfsync_bus bus; 2615 } __packed r; 2616 2617 struct pfsync_softc *sc = V_pfsyncif; 2618 2619 bzero(&r, sizeof(r)); 2620 2621 r.subh.action = PFSYNC_ACT_BUS; 2622 r.subh.count = htons(1); 2623 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2624 2625 r.bus.creatorid = V_pf_status.hostid; 2626 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2627 r.bus.status = status; 2628 2629 pfsync_send_plus(&r, sizeof(r)); 2630 } 2631 2632 static void 2633 pfsync_bulk_fail(void *arg) 2634 { 2635 struct pfsync_softc *sc = arg; 2636 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2637 2638 CURVNET_SET(sc->sc_ifp->if_vnet); 2639 2640 PFSYNC_BLOCK_ASSERT(sc); 2641 2642 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2643 /* Try again */ 2644 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2645 pfsync_bulk_fail, V_pfsyncif); 2646 PFSYNC_BUCKET_LOCK(b); 2647 pfsync_request_update(0, 0); 2648 PFSYNC_BUCKET_UNLOCK(b); 2649 } else { 2650 /* Pretend like the transfer was ok. */ 2651 sc->sc_ureq_sent = 0; 2652 sc->sc_bulk_tries = 0; 2653 PFSYNC_LOCK(sc); 2654 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2655 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2656 "pfsync bulk fail"); 2657 sc->sc_flags |= PFSYNCF_OK; 2658 PFSYNC_UNLOCK(sc); 2659 if (V_pf_status.debug >= PF_DEBUG_MISC) 2660 printf("pfsync: failed to receive bulk update\n"); 2661 } 2662 2663 CURVNET_RESTORE(); 2664 } 2665 2666 static void 2667 pfsync_send_plus(void *plus, size_t pluslen) 2668 { 2669 struct pfsync_softc *sc = V_pfsyncif; 2670 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2671 uint8_t *newplus; 2672 2673 PFSYNC_BUCKET_LOCK(b); 2674 2675 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2676 pfsync_sendout(1, b->b_id); 2677 2678 newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT); 2679 if (newplus == NULL) 2680 goto out; 2681 2682 if (b->b_plus != NULL) { 2683 memcpy(newplus, b->b_plus, b->b_pluslen); 2684 free(b->b_plus, M_PFSYNC); 2685 } else { 2686 MPASS(b->b_pluslen == 0); 2687 } 2688 memcpy(newplus + b->b_pluslen, plus, pluslen); 2689 2690 b->b_plus = newplus; 2691 b->b_pluslen += pluslen; 2692 b->b_len += pluslen; 2693 2694 pfsync_sendout(1, b->b_id); 2695 2696 out: 2697 PFSYNC_BUCKET_UNLOCK(b); 2698 } 2699 2700 static void 2701 pfsync_timeout(void *arg) 2702 { 2703 struct pfsync_bucket *b = arg; 2704 2705 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2706 PFSYNC_BUCKET_LOCK(b); 2707 pfsync_push(b); 2708 PFSYNC_BUCKET_UNLOCK(b); 2709 CURVNET_RESTORE(); 2710 } 2711 2712 static void 2713 pfsync_push(struct pfsync_bucket *b) 2714 { 2715 2716 PFSYNC_BUCKET_LOCK_ASSERT(b); 2717 2718 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2719 swi_sched(V_pfsync_swi_cookie, 0); 2720 } 2721 2722 static void 2723 pfsync_push_all(struct pfsync_softc *sc) 2724 { 2725 int c; 2726 struct pfsync_bucket *b; 2727 2728 for (c = 0; c < pfsync_buckets; c++) { 2729 b = &sc->sc_buckets[c]; 2730 2731 PFSYNC_BUCKET_LOCK(b); 2732 pfsync_push(b); 2733 PFSYNC_BUCKET_UNLOCK(b); 2734 } 2735 } 2736 2737 static void 2738 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2739 { 2740 struct ip *ip; 2741 int af, error = 0; 2742 2743 ip = mtod(m, struct ip *); 2744 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2745 2746 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2747 2748 /* 2749 * We distinguish between a deferral packet and our 2750 * own pfsync packet based on M_SKIP_FIREWALL 2751 * flag. This is XXX. 2752 */ 2753 switch (af) { 2754 #ifdef INET 2755 case AF_INET: 2756 if (m->m_flags & M_SKIP_FIREWALL) { 2757 error = ip_output(m, NULL, NULL, 0, 2758 NULL, NULL); 2759 } else { 2760 error = ip_output(m, NULL, NULL, 2761 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2762 } 2763 break; 2764 #endif 2765 #ifdef INET6 2766 case AF_INET6: 2767 if (m->m_flags & M_SKIP_FIREWALL) { 2768 error = ip6_output(m, NULL, NULL, 0, 2769 NULL, NULL, NULL); 2770 } else { 2771 error = ip6_output(m, NULL, NULL, 0, 2772 &sc->sc_im6o, NULL, NULL); 2773 } 2774 break; 2775 #endif 2776 } 2777 2778 if (error == 0) 2779 V_pfsyncstats.pfsyncs_opackets++; 2780 else 2781 V_pfsyncstats.pfsyncs_oerrors++; 2782 2783 } 2784 2785 static void 2786 pfsyncintr(void *arg) 2787 { 2788 struct epoch_tracker et; 2789 struct pfsync_softc *sc = arg; 2790 struct pfsync_bucket *b; 2791 struct mbuf *m, *n; 2792 int c; 2793 2794 NET_EPOCH_ENTER(et); 2795 CURVNET_SET(sc->sc_ifp->if_vnet); 2796 2797 for (c = 0; c < pfsync_buckets; c++) { 2798 b = &sc->sc_buckets[c]; 2799 2800 PFSYNC_BUCKET_LOCK(b); 2801 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2802 pfsync_sendout(0, b->b_id); 2803 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2804 } 2805 _IF_DEQUEUE_ALL(&b->b_snd, m); 2806 PFSYNC_BUCKET_UNLOCK(b); 2807 2808 for (; m != NULL; m = n) { 2809 n = m->m_nextpkt; 2810 m->m_nextpkt = NULL; 2811 2812 pfsync_tx(sc, m); 2813 } 2814 } 2815 CURVNET_RESTORE(); 2816 NET_EPOCH_EXIT(et); 2817 } 2818 2819 static int 2820 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2821 struct in_mfilter* imf, struct in6_mfilter* im6f) 2822 { 2823 #ifdef INET 2824 struct ip_moptions *imo = &sc->sc_imo; 2825 #endif 2826 #ifdef INET6 2827 struct ip6_moptions *im6o = &sc->sc_im6o; 2828 struct sockaddr_in6 *syncpeer_sa6 = NULL; 2829 #endif 2830 2831 if (!(ifp->if_flags & IFF_MULTICAST)) 2832 return (EADDRNOTAVAIL); 2833 2834 switch (sc->sc_sync_peer.ss_family) { 2835 #ifdef INET 2836 case AF_INET: 2837 { 2838 int error; 2839 2840 ip_mfilter_init(&imo->imo_head); 2841 imo->imo_multicast_vif = -1; 2842 if ((error = in_joingroup(ifp, 2843 &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2844 &imf->imf_inm)) != 0) 2845 return (error); 2846 2847 ip_mfilter_insert(&imo->imo_head, imf); 2848 imo->imo_multicast_ifp = ifp; 2849 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2850 imo->imo_multicast_loop = 0; 2851 break; 2852 } 2853 #endif 2854 #ifdef INET6 2855 case AF_INET6: 2856 { 2857 int error; 2858 2859 syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; 2860 if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) 2861 return (error); 2862 2863 ip6_mfilter_init(&im6o->im6o_head); 2864 if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, 2865 &(im6f->im6f_in6m), 0)) != 0) 2866 return (error); 2867 2868 ip6_mfilter_insert(&im6o->im6o_head, im6f); 2869 im6o->im6o_multicast_ifp = ifp; 2870 im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; 2871 im6o->im6o_multicast_loop = 0; 2872 break; 2873 } 2874 #endif 2875 } 2876 2877 return (0); 2878 } 2879 2880 static void 2881 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2882 { 2883 #ifdef INET 2884 struct ip_moptions *imo = &sc->sc_imo; 2885 struct in_mfilter *imf; 2886 2887 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2888 ip_mfilter_remove(&imo->imo_head, imf); 2889 in_leavegroup(imf->imf_inm, NULL); 2890 ip_mfilter_free(imf); 2891 } 2892 imo->imo_multicast_ifp = NULL; 2893 #endif 2894 2895 #ifdef INET6 2896 struct ip6_moptions *im6o = &sc->sc_im6o; 2897 struct in6_mfilter *im6f; 2898 2899 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2900 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2901 in6_leavegroup(im6f->im6f_in6m, NULL); 2902 ip6_mfilter_free(im6f); 2903 } 2904 im6o->im6o_multicast_ifp = NULL; 2905 #endif 2906 } 2907 2908 void 2909 pfsync_detach_ifnet(struct ifnet *ifp) 2910 { 2911 struct pfsync_softc *sc = V_pfsyncif; 2912 2913 if (sc == NULL) 2914 return; 2915 2916 PFSYNC_LOCK(sc); 2917 2918 if (sc->sc_sync_if == ifp) { 2919 /* We don't need mutlicast cleanup here, because the interface 2920 * is going away. We do need to ensure we don't try to do 2921 * cleanup later. 2922 */ 2923 ip_mfilter_init(&sc->sc_imo.imo_head); 2924 sc->sc_imo.imo_multicast_ifp = NULL; 2925 sc->sc_im6o.im6o_multicast_ifp = NULL; 2926 sc->sc_sync_if = NULL; 2927 } 2928 2929 PFSYNC_UNLOCK(sc); 2930 } 2931 2932 static int 2933 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2934 { 2935 struct sockaddr_storage sa; 2936 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2937 status->flags = pfsyncr->pfsyncr_defer; 2938 2939 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2940 2941 memset(&sa, 0, sizeof(sa)); 2942 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2943 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2944 in->sin_family = AF_INET; 2945 in->sin_len = sizeof(*in); 2946 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2947 } 2948 status->syncpeer = sa; 2949 2950 return 0; 2951 } 2952 2953 static int 2954 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2955 { 2956 struct ifnet *sifp; 2957 struct in_mfilter *imf = NULL; 2958 struct in6_mfilter *im6f = NULL; 2959 int error; 2960 int c; 2961 2962 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2963 return (EINVAL); 2964 2965 if (status->syncdev[0] == '\0') 2966 sifp = NULL; 2967 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2968 return (EINVAL); 2969 2970 switch (status->syncpeer.ss_family) { 2971 #ifdef INET 2972 case AF_UNSPEC: 2973 case AF_INET: { 2974 struct sockaddr_in *status_sin; 2975 status_sin = (struct sockaddr_in *)&(status->syncpeer); 2976 if (sifp != NULL) { 2977 if (status_sin->sin_addr.s_addr == 0 || 2978 status_sin->sin_addr.s_addr == 2979 htonl(INADDR_PFSYNC_GROUP)) { 2980 status_sin->sin_family = AF_INET; 2981 status_sin->sin_len = sizeof(*status_sin); 2982 status_sin->sin_addr.s_addr = 2983 htonl(INADDR_PFSYNC_GROUP); 2984 } 2985 2986 if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { 2987 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2988 } 2989 } 2990 break; 2991 } 2992 #endif 2993 #ifdef INET6 2994 case AF_INET6: { 2995 struct sockaddr_in6 *status_sin6; 2996 status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); 2997 if (sifp != NULL) { 2998 if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || 2999 IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, 3000 &in6addr_linklocal_pfsync_group)) { 3001 status_sin6->sin6_family = AF_INET6; 3002 status_sin6->sin6_len = sizeof(*status_sin6); 3003 status_sin6->sin6_addr = 3004 in6addr_linklocal_pfsync_group; 3005 } 3006 3007 if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { 3008 im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); 3009 } 3010 } 3011 break; 3012 } 3013 #endif 3014 } 3015 3016 PFSYNC_LOCK(sc); 3017 3018 switch (status->version) { 3019 case PFSYNC_MSG_VERSION_UNSPECIFIED: 3020 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 3021 break; 3022 case PFSYNC_MSG_VERSION_1301: 3023 case PFSYNC_MSG_VERSION_1400: 3024 sc->sc_version = status->version; 3025 break; 3026 default: 3027 PFSYNC_UNLOCK(sc); 3028 return (EINVAL); 3029 } 3030 3031 switch (status->syncpeer.ss_family) { 3032 case AF_INET: { 3033 struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); 3034 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 3035 sc_sin->sin_family = AF_INET; 3036 sc_sin->sin_len = sizeof(*sc_sin); 3037 if (status_sin->sin_addr.s_addr == 0) { 3038 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 3039 } else { 3040 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 3041 } 3042 break; 3043 } 3044 case AF_INET6: { 3045 struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); 3046 struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; 3047 sc_sin->sin6_family = AF_INET6; 3048 sc_sin->sin6_len = sizeof(*sc_sin); 3049 if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { 3050 sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; 3051 } else { 3052 sc_sin->sin6_addr = status_sin->sin6_addr; 3053 } 3054 break; 3055 } 3056 } 3057 3058 sc->sc_maxupdates = status->maxupdates; 3059 if (status->flags & PFSYNCF_DEFER) { 3060 sc->sc_flags |= PFSYNCF_DEFER; 3061 V_pfsync_defer_ptr = pfsync_defer; 3062 } else { 3063 sc->sc_flags &= ~PFSYNCF_DEFER; 3064 V_pfsync_defer_ptr = NULL; 3065 } 3066 3067 if (sifp == NULL) { 3068 if (sc->sc_sync_if) 3069 if_rele(sc->sc_sync_if); 3070 sc->sc_sync_if = NULL; 3071 pfsync_multicast_cleanup(sc); 3072 PFSYNC_UNLOCK(sc); 3073 return (0); 3074 } 3075 3076 for (c = 0; c < pfsync_buckets; c++) { 3077 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 3078 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 3079 (sifp->if_mtu < sc->sc_ifp->if_mtu || 3080 (sc->sc_sync_if != NULL && 3081 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 3082 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 3083 pfsync_sendout(1, c); 3084 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 3085 } 3086 3087 pfsync_multicast_cleanup(sc); 3088 3089 if (((sc->sc_sync_peer.ss_family == AF_INET) && 3090 IN_MULTICAST(ntohl(((struct sockaddr_in *) 3091 &sc->sc_sync_peer)->sin_addr.s_addr))) || 3092 ((sc->sc_sync_peer.ss_family == AF_INET6) && 3093 IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) 3094 &sc->sc_sync_peer)->sin6_addr))) { 3095 error = pfsync_multicast_setup(sc, sifp, imf, im6f); 3096 if (error) { 3097 if_rele(sifp); 3098 PFSYNC_UNLOCK(sc); 3099 #ifdef INET 3100 if (imf != NULL) 3101 ip_mfilter_free(imf); 3102 #endif 3103 #ifdef INET6 3104 if (im6f != NULL) 3105 ip6_mfilter_free(im6f); 3106 #endif 3107 return (error); 3108 } 3109 } 3110 if (sc->sc_sync_if) 3111 if_rele(sc->sc_sync_if); 3112 sc->sc_sync_if = sifp; 3113 3114 switch (sc->sc_sync_peer.ss_family) { 3115 #ifdef INET 3116 case AF_INET: { 3117 struct ip *ip; 3118 ip = &sc->sc_template.ipv4; 3119 bzero(ip, sizeof(*ip)); 3120 ip->ip_v = IPVERSION; 3121 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 3122 ip->ip_tos = IPTOS_LOWDELAY; 3123 /* len and id are set later. */ 3124 ip->ip_off = htons(IP_DF); 3125 ip->ip_ttl = PFSYNC_DFLTTL; 3126 ip->ip_p = IPPROTO_PFSYNC; 3127 ip->ip_src.s_addr = INADDR_ANY; 3128 ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 3129 break; 3130 } 3131 #endif 3132 #ifdef INET6 3133 case AF_INET6: { 3134 struct ip6_hdr *ip6; 3135 ip6 = &sc->sc_template.ipv6; 3136 bzero(ip6, sizeof(*ip6)); 3137 ip6->ip6_vfc = IPV6_VERSION; 3138 ip6->ip6_hlim = PFSYNC_DFLTTL; 3139 ip6->ip6_nxt = IPPROTO_PFSYNC; 3140 ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; 3141 3142 struct epoch_tracker et; 3143 NET_EPOCH_ENTER(et); 3144 in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, 3145 sc->sc_sync_if, &ip6->ip6_src, NULL); 3146 NET_EPOCH_EXIT(et); 3147 break; 3148 } 3149 #endif 3150 } 3151 3152 /* Request a full state table update. */ 3153 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 3154 (*carp_demote_adj_p)(V_pfsync_carp_adj, 3155 "pfsync bulk start"); 3156 sc->sc_flags &= ~PFSYNCF_OK; 3157 if (V_pf_status.debug >= PF_DEBUG_MISC) 3158 printf("pfsync: requesting bulk update\n"); 3159 PFSYNC_UNLOCK(sc); 3160 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 3161 pfsync_request_update(0, 0); 3162 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 3163 PFSYNC_BLOCK(sc); 3164 sc->sc_ureq_sent = time_uptime; 3165 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 3166 PFSYNC_BUNLOCK(sc); 3167 return (0); 3168 } 3169 3170 static void 3171 pfsync_pointers_init(void) 3172 { 3173 3174 PF_RULES_WLOCK(); 3175 V_pfsync_state_import_ptr = pfsync_state_import; 3176 V_pfsync_insert_state_ptr = pfsync_insert_state; 3177 V_pfsync_update_state_ptr = pfsync_update_state; 3178 V_pfsync_delete_state_ptr = pfsync_delete_state; 3179 V_pfsync_clear_states_ptr = pfsync_clear_states; 3180 V_pfsync_defer_ptr = pfsync_defer; 3181 PF_RULES_WUNLOCK(); 3182 } 3183 3184 static void 3185 pfsync_pointers_uninit(void) 3186 { 3187 3188 PF_RULES_WLOCK(); 3189 V_pfsync_state_import_ptr = NULL; 3190 V_pfsync_insert_state_ptr = NULL; 3191 V_pfsync_update_state_ptr = NULL; 3192 V_pfsync_delete_state_ptr = NULL; 3193 V_pfsync_clear_states_ptr = NULL; 3194 V_pfsync_defer_ptr = NULL; 3195 PF_RULES_WUNLOCK(); 3196 } 3197 3198 static void 3199 vnet_pfsync_init(const void *unused __unused) 3200 { 3201 int error; 3202 3203 V_pfsync_cloner = if_clone_simple(pfsyncname, 3204 pfsync_clone_create, pfsync_clone_destroy, 1); 3205 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 3206 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 3207 if (error) { 3208 if_clone_detach(V_pfsync_cloner); 3209 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 3210 } 3211 3212 pfsync_pointers_init(); 3213 } 3214 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 3215 vnet_pfsync_init, NULL); 3216 3217 static void 3218 vnet_pfsync_uninit(const void *unused __unused) 3219 { 3220 int ret __diagused; 3221 3222 pfsync_pointers_uninit(); 3223 3224 if_clone_detach(V_pfsync_cloner); 3225 ret = swi_remove(V_pfsync_swi_cookie); 3226 MPASS(ret == 0); 3227 ret = intr_event_destroy(V_pfsync_swi_ie); 3228 MPASS(ret == 0); 3229 } 3230 3231 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 3232 vnet_pfsync_uninit, NULL); 3233 3234 static int 3235 pfsync_init(void) 3236 { 3237 int error; 3238 3239 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 3240 3241 #ifdef INET 3242 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 3243 if (error) 3244 return (error); 3245 #endif 3246 #ifdef INET6 3247 error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); 3248 if (error) { 3249 ipproto_unregister(IPPROTO_PFSYNC); 3250 return (error); 3251 } 3252 #endif 3253 3254 return (0); 3255 } 3256 3257 static void 3258 pfsync_uninit(void) 3259 { 3260 pfsync_detach_ifnet_ptr = NULL; 3261 3262 #ifdef INET 3263 ipproto_unregister(IPPROTO_PFSYNC); 3264 #endif 3265 #ifdef INET6 3266 ip6proto_unregister(IPPROTO_PFSYNC); 3267 #endif 3268 } 3269 3270 static int 3271 pfsync_modevent(module_t mod, int type, void *data) 3272 { 3273 int error = 0; 3274 3275 switch (type) { 3276 case MOD_LOAD: 3277 error = pfsync_init(); 3278 break; 3279 case MOD_UNLOAD: 3280 pfsync_uninit(); 3281 break; 3282 default: 3283 error = EINVAL; 3284 break; 3285 } 3286 3287 return (error); 3288 } 3289 3290 static moduledata_t pfsync_mod = { 3291 pfsyncname, 3292 pfsync_modevent, 3293 0 3294 }; 3295 3296 #define PFSYNC_MODVER 1 3297 3298 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 3299 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 3300 MODULE_VERSION(pfsync, PFSYNC_MODVER); 3301 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 3302