1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 #include "opt_pf.h" 66 67 #include <sys/param.h> 68 #include <sys/bus.h> 69 #include <sys/endian.h> 70 #include <sys/interrupt.h> 71 #include <sys/kernel.h> 72 #include <sys/lock.h> 73 #include <sys/mbuf.h> 74 #include <sys/module.h> 75 #include <sys/mutex.h> 76 #include <sys/nv.h> 77 #include <sys/priv.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sysctl.h> 82 #include <sys/syslog.h> 83 84 #include <net/bpf.h> 85 #include <net/if.h> 86 #include <net/if_var.h> 87 #include <net/if_clone.h> 88 #include <net/if_private.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/route.h> 93 #include <net/if_pfsync.h> 94 95 #include <netinet/if_ether.h> 96 #include <netinet/in.h> 97 #include <netinet/in_var.h> 98 #include <netinet6/in6_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip6.h> 101 #include <netinet/ip_carp.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_fsm.h> 105 #include <netinet/tcp_seq.h> 106 107 #include <netinet/ip6.h> 108 #include <netinet6/ip6_var.h> 109 #include <netinet6/scope6_var.h> 110 111 #include <netpfil/pf/pfsync_nv.h> 112 113 struct pfsync_bucket; 114 struct pfsync_softc; 115 116 union inet_template { 117 struct ip ipv4; 118 struct ip6_hdr ipv6; 119 }; 120 121 #define PFSYNC_MINPKT ( \ 122 sizeof(union inet_template) + \ 123 sizeof(struct pfsync_header) + \ 124 sizeof(struct pfsync_subheader) ) 125 126 static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, 127 struct pfsync_state_peer *); 128 static int pfsync_in_clr(struct mbuf *, int, int, int, int); 129 static int pfsync_in_ins(struct mbuf *, int, int, int, int); 130 static int pfsync_in_iack(struct mbuf *, int, int, int, int); 131 static int pfsync_in_upd(struct mbuf *, int, int, int, int); 132 static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); 133 static int pfsync_in_ureq(struct mbuf *, int, int, int, int); 134 static int pfsync_in_del_c(struct mbuf *, int, int, int, int); 135 static int pfsync_in_bus(struct mbuf *, int, int, int, int); 136 static int pfsync_in_tdb(struct mbuf *, int, int, int, int); 137 static int pfsync_in_eof(struct mbuf *, int, int, int, int); 138 static int pfsync_in_error(struct mbuf *, int, int, int, int); 139 140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { 141 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 142 pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ 143 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 144 pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ 145 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 146 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL */ 148 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 149 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 150 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 151 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 152 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 153 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 154 pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ 155 pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ 156 }; 157 158 struct pfsync_q { 159 void (*write)(struct pf_kstate *, void *); 160 size_t len; 161 u_int8_t action; 162 }; 163 164 /* We have the following sync queues */ 165 enum pfsync_q_id { 166 PFSYNC_Q_INS_1301, 167 PFSYNC_Q_INS_1400, 168 PFSYNC_Q_IACK, 169 PFSYNC_Q_UPD_1301, 170 PFSYNC_Q_UPD_1400, 171 PFSYNC_Q_UPD_C, 172 PFSYNC_Q_DEL_C, 173 PFSYNC_Q_COUNT, 174 }; 175 176 /* Functions for building messages for given queue */ 177 static void pfsync_out_state_1301(struct pf_kstate *, void *); 178 static void pfsync_out_state_1400(struct pf_kstate *, void *); 179 static void pfsync_out_iack(struct pf_kstate *, void *); 180 static void pfsync_out_upd_c(struct pf_kstate *, void *); 181 static void pfsync_out_del_c(struct pf_kstate *, void *); 182 183 /* Attach those functions to queue */ 184 static struct pfsync_q pfsync_qs[] = { 185 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, 186 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, 187 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 188 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, 189 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, 190 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 191 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 192 }; 193 194 /* Map queue to pf_kstate->sync_state */ 195 static u_int8_t pfsync_qid_sstate[] = { 196 PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ 197 PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ 198 PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ 199 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ 200 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ 201 PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ 202 PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ 203 }; 204 205 /* Map pf_kstate->sync_state to queue */ 206 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); 207 208 static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); 209 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 210 211 static void pfsync_update_state(struct pf_kstate *); 212 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 213 214 struct pfsync_upd_req_item { 215 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 216 struct pfsync_upd_req ur_msg; 217 }; 218 219 struct pfsync_deferral { 220 struct pfsync_softc *pd_sc; 221 TAILQ_ENTRY(pfsync_deferral) pd_entry; 222 struct callout pd_tmo; 223 224 struct pf_kstate *pd_st; 225 struct mbuf *pd_m; 226 }; 227 228 struct pfsync_bucket 229 { 230 int b_id; 231 struct pfsync_softc *b_sc; 232 struct mtx b_mtx; 233 struct callout b_tmo; 234 int b_flags; 235 #define PFSYNCF_BUCKET_PUSH 0x00000001 236 237 size_t b_len; 238 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; 239 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 240 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 241 u_int b_deferred; 242 uint8_t *b_plus; 243 size_t b_pluslen; 244 245 struct ifaltq b_snd; 246 }; 247 248 struct pfsync_softc { 249 /* Configuration */ 250 struct ifnet *sc_ifp; 251 struct ifnet *sc_sync_if; 252 struct ip_moptions sc_imo; 253 struct ip6_moptions sc_im6o; 254 struct sockaddr_storage sc_sync_peer; 255 uint32_t sc_flags; 256 uint8_t sc_maxupdates; 257 union inet_template sc_template; 258 struct mtx sc_mtx; 259 uint32_t sc_version; 260 261 /* Queued data */ 262 struct pfsync_bucket *sc_buckets; 263 264 /* Bulk update info */ 265 struct mtx sc_bulk_mtx; 266 uint32_t sc_ureq_sent; 267 int sc_bulk_tries; 268 uint32_t sc_ureq_received; 269 int sc_bulk_hashid; 270 uint64_t sc_bulk_stateid; 271 uint32_t sc_bulk_creatorid; 272 struct callout sc_bulk_tmo; 273 struct callout sc_bulkfail_tmo; 274 }; 275 276 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 277 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 278 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 279 280 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 281 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 282 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 283 284 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 285 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 286 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 287 288 #define PFSYNC_DEFER_TIMEOUT 20 289 290 static const char pfsyncname[] = "pfsync"; 291 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 292 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 293 #define V_pfsyncif VNET(pfsyncif) 294 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 295 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 296 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 297 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 298 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 299 #define V_pfsyncstats VNET(pfsyncstats) 300 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 301 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 302 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; 303 #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) 304 305 static void pfsync_timeout(void *); 306 static void pfsync_push(struct pfsync_bucket *); 307 static void pfsync_push_all(struct pfsync_softc *); 308 static void pfsyncintr(void *); 309 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 310 struct in_mfilter *, struct in6_mfilter *); 311 static void pfsync_multicast_cleanup(struct pfsync_softc *); 312 static void pfsync_pointers_init(void); 313 static void pfsync_pointers_uninit(void); 314 static int pfsync_init(void); 315 static void pfsync_uninit(void); 316 317 static unsigned long pfsync_buckets; 318 319 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 320 "PFSYNC"); 321 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 322 &VNET_NAME(pfsyncstats), pfsyncstats, 323 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 324 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 325 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 326 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 327 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 328 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, 329 &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); 330 331 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 332 static void pfsync_clone_destroy(struct ifnet *); 333 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 334 struct pf_state_peer *); 335 static int pfsyncoutput(struct ifnet *, struct mbuf *, 336 const struct sockaddr *, struct route *); 337 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 338 339 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 340 static void pfsync_undefer(struct pfsync_deferral *, int); 341 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 342 static void pfsync_undefer_state(struct pf_kstate *, int); 343 static void pfsync_defer_tmo(void *); 344 345 static void pfsync_request_update(u_int32_t, u_int64_t); 346 static bool pfsync_update_state_req(struct pf_kstate *); 347 348 static void pfsync_drop_all(struct pfsync_softc *); 349 static void pfsync_drop(struct pfsync_softc *, int); 350 static void pfsync_sendout(int, int); 351 static void pfsync_send_plus(void *, size_t); 352 353 static void pfsync_bulk_start(void); 354 static void pfsync_bulk_status(u_int8_t); 355 static void pfsync_bulk_update(void *); 356 static void pfsync_bulk_fail(void *); 357 358 static void pfsync_detach_ifnet(struct ifnet *); 359 360 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 361 struct pfsync_kstatus *); 362 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 363 struct pfsync_softc *); 364 365 #ifdef IPSEC 366 static void pfsync_update_net_tdb(struct pfsync_tdb *); 367 #endif 368 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 369 struct pf_kstate *); 370 371 #define PFSYNC_MAX_BULKTRIES 12 372 373 VNET_DEFINE(struct if_clone *, pfsync_cloner); 374 #define V_pfsync_cloner VNET(pfsync_cloner) 375 376 const struct in6_addr in6addr_linklocal_pfsync_group = 377 {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 378 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; 379 static int 380 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 381 { 382 struct pfsync_softc *sc; 383 struct ifnet *ifp; 384 struct pfsync_bucket *b; 385 int c; 386 enum pfsync_q_id q; 387 388 if (unit != 0) 389 return (EINVAL); 390 391 if (! pfsync_buckets) 392 pfsync_buckets = mp_ncpus * 2; 393 394 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 395 sc->sc_flags |= PFSYNCF_OK; 396 sc->sc_maxupdates = 128; 397 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 398 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 399 M_PFSYNC, M_ZERO | M_WAITOK); 400 for (c = 0; c < pfsync_buckets; c++) { 401 b = &sc->sc_buckets[c]; 402 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 403 404 b->b_id = c; 405 b->b_sc = sc; 406 b->b_len = PFSYNC_MINPKT; 407 408 for (q = 0; q < PFSYNC_Q_COUNT; q++) 409 TAILQ_INIT(&b->b_qs[q]); 410 411 TAILQ_INIT(&b->b_upd_req_list); 412 TAILQ_INIT(&b->b_deferrals); 413 414 callout_init(&b->b_tmo, 1); 415 416 b->b_snd.ifq_maxlen = ifqmaxlen; 417 } 418 419 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 420 if_initname(ifp, pfsyncname, unit); 421 ifp->if_softc = sc; 422 ifp->if_ioctl = pfsyncioctl; 423 ifp->if_output = pfsyncoutput; 424 ifp->if_hdrlen = sizeof(struct pfsync_header); 425 ifp->if_mtu = ETHERMTU; 426 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 427 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 428 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 429 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 430 431 if_attach(ifp); 432 433 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 434 435 V_pfsyncif = sc; 436 437 return (0); 438 } 439 440 static void 441 pfsync_clone_destroy(struct ifnet *ifp) 442 { 443 struct pfsync_softc *sc = ifp->if_softc; 444 struct pfsync_bucket *b; 445 int c, ret; 446 447 for (c = 0; c < pfsync_buckets; c++) { 448 b = &sc->sc_buckets[c]; 449 /* 450 * At this stage, everything should have already been 451 * cleared by pfsync_uninit(), and we have only to 452 * drain callouts. 453 */ 454 PFSYNC_BUCKET_LOCK(b); 455 while (b->b_deferred > 0) { 456 struct pfsync_deferral *pd = 457 TAILQ_FIRST(&b->b_deferrals); 458 459 ret = callout_stop(&pd->pd_tmo); 460 PFSYNC_BUCKET_UNLOCK(b); 461 if (ret > 0) { 462 pfsync_undefer(pd, 1); 463 } else { 464 callout_drain(&pd->pd_tmo); 465 } 466 PFSYNC_BUCKET_LOCK(b); 467 } 468 MPASS(b->b_deferred == 0); 469 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 470 PFSYNC_BUCKET_UNLOCK(b); 471 472 free(b->b_plus, M_PFSYNC); 473 b->b_plus = NULL; 474 b->b_pluslen = 0; 475 476 callout_drain(&b->b_tmo); 477 } 478 479 callout_drain(&sc->sc_bulkfail_tmo); 480 callout_drain(&sc->sc_bulk_tmo); 481 482 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 483 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 484 bpfdetach(ifp); 485 if_detach(ifp); 486 487 pfsync_drop_all(sc); 488 489 if_free(ifp); 490 pfsync_multicast_cleanup(sc); 491 mtx_destroy(&sc->sc_mtx); 492 mtx_destroy(&sc->sc_bulk_mtx); 493 494 for (c = 0; c < pfsync_buckets; c++) { 495 b = &sc->sc_buckets[c]; 496 mtx_destroy(&b->b_mtx); 497 } 498 free(sc->sc_buckets, M_PFSYNC); 499 free(sc, M_PFSYNC); 500 501 V_pfsyncif = NULL; 502 } 503 504 static int 505 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 506 struct pf_state_peer *d) 507 { 508 if (s->scrub.scrub_flag && d->scrub == NULL) { 509 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 510 if (d->scrub == NULL) 511 return (ENOMEM); 512 } 513 514 return (0); 515 } 516 517 static int 518 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) 519 { 520 struct pfsync_softc *sc = V_pfsyncif; 521 #ifndef __NO_STRICT_ALIGNMENT 522 struct pfsync_state_key key[2]; 523 #endif 524 struct pfsync_state_key *kw, *ks; 525 struct pf_kstate *st = NULL; 526 struct pf_state_key *skw = NULL, *sks = NULL; 527 struct pf_krule *r = NULL; 528 struct pfi_kkif *kif; 529 struct pfi_kkif *rt_kif = NULL; 530 struct pf_kpooladdr *rpool_first; 531 int error; 532 sa_family_t rt_af = 0; 533 uint8_t rt = 0; 534 int n = 0; 535 536 PF_RULES_RASSERT(); 537 538 if (sp->pfs_1301.creatorid == 0) { 539 if (V_pf_status.debug >= PF_DEBUG_MISC) 540 printf("%s: invalid creator id: %08x\n", __func__, 541 ntohl(sp->pfs_1301.creatorid)); 542 return (EINVAL); 543 } 544 545 if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { 546 if (V_pf_status.debug >= PF_DEBUG_MISC) 547 printf("%s: unknown interface: %s\n", __func__, 548 sp->pfs_1301.ifname); 549 if (flags & PFSYNC_SI_IOCTL) 550 return (EINVAL); 551 return (0); /* skip this state */ 552 } 553 554 /* 555 * If the ruleset checksums match or the state is coming from the ioctl, 556 * it's safe to associate the state with the rule of that number. 557 */ 558 if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && 559 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < 560 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { 561 TAILQ_FOREACH(r, pf_main_ruleset.rules[ 562 PF_RULESET_FILTER].active.ptr, entries) 563 if (ntohl(sp->pfs_1301.rule) == n++) 564 break; 565 } else 566 r = &V_pf_default_rule; 567 568 /* 569 * Check routing interface early on. Do it before allocating memory etc. 570 * because there is a high chance there will be a lot more such states. 571 */ 572 switch (msg_version) { 573 case PFSYNC_MSG_VERSION_1301: 574 /* 575 * On FreeBSD <= 13 the routing interface and routing operation 576 * are not sent over pfsync. If the ruleset is identical, 577 * though, we might be able to recover the routing information 578 * from the local ruleset. 579 */ 580 if (r != &V_pf_default_rule) { 581 struct pf_kpool *pool = &r->route; 582 583 /* Backwards compatibility. */ 584 if (TAILQ_EMPTY(&pool->list)) 585 pool = &r->rdr; 586 587 /* 588 * The ruleset is identical, try to recover. If the rule 589 * has a redirection pool with a single interface, there 590 * is a chance that this interface is identical as on 591 * the pfsync peer. If there's more than one interface, 592 * give up, as we can't be sure that we will pick the 593 * same one as the pfsync peer did. 594 */ 595 rpool_first = TAILQ_FIRST(&(pool->list)); 596 if ((rpool_first == NULL) || 597 (TAILQ_NEXT(rpool_first, entries) != NULL)) { 598 DPFPRINTF(PF_DEBUG_MISC, 599 "%s: can't recover routing information " 600 "because of empty or bad redirection pool", 601 __func__); 602 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 603 } 604 rt = r->rt; 605 rt_kif = rpool_first->kif; 606 /* 607 * Guess the AF of the route address, FreeBSD 13 does 608 * not support af-to so it should be safe. 609 */ 610 rt_af = r->af; 611 } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { 612 /* 613 * Ruleset different, routing *supposedly* requested, 614 * give up on recovering. 615 */ 616 DPFPRINTF(PF_DEBUG_MISC, 617 "%s: can't recover routing information " 618 "because of different ruleset", __func__); 619 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 620 } 621 break; 622 case PFSYNC_MSG_VERSION_1400: 623 /* 624 * On FreeBSD 14 and above we're not taking any chances. 625 * We use the information synced to us. 626 */ 627 if (sp->pfs_1400.rt) { 628 rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); 629 if (rt_kif == NULL) { 630 DPFPRINTF(PF_DEBUG_MISC, 631 "%s: unknown route interface: %s", 632 __func__, sp->pfs_1400.rt_ifname); 633 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 634 } 635 rt = sp->pfs_1400.rt; 636 /* 637 * Guess the AF of the route address, FreeBSD 13 does 638 * not support af-to so it should be safe. 639 */ 640 rt_af = sp->pfs_1400.af; 641 } 642 break; 643 } 644 645 if ((r->max_states && 646 counter_u64_fetch(r->states_cur) >= r->max_states)) 647 goto cleanup; 648 649 /* 650 * XXXGL: consider M_WAITOK in ioctl path after. 651 */ 652 st = pf_alloc_state(M_NOWAIT); 653 if (__predict_false(st == NULL)) 654 goto cleanup; 655 656 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 657 goto cleanup; 658 659 #ifndef __NO_STRICT_ALIGNMENT 660 bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); 661 kw = &key[PF_SK_WIRE]; 662 ks = &key[PF_SK_STACK]; 663 #else 664 kw = &sp->pfs_1301.key[PF_SK_WIRE]; 665 ks = &sp->pfs_1301.key[PF_SK_STACK]; 666 #endif 667 668 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || 669 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || 670 kw->port[0] != ks->port[0] || 671 kw->port[1] != ks->port[1]) { 672 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 673 if (sks == NULL) 674 goto cleanup; 675 } else 676 sks = skw; 677 678 /* allocate memory for scrub info */ 679 if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || 680 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) 681 goto cleanup; 682 683 /* Copy to state key(s). */ 684 skw->addr[0] = kw->addr[0]; 685 skw->addr[1] = kw->addr[1]; 686 skw->port[0] = kw->port[0]; 687 skw->port[1] = kw->port[1]; 688 skw->proto = sp->pfs_1301.proto; 689 skw->af = sp->pfs_1301.af; 690 if (sks != skw) { 691 sks->addr[0] = ks->addr[0]; 692 sks->addr[1] = ks->addr[1]; 693 sks->port[0] = ks->port[0]; 694 sks->port[1] = ks->port[1]; 695 sks->proto = sp->pfs_1301.proto; 696 sks->af = sp->pfs_1301.af; 697 } 698 699 /* copy to state */ 700 bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, sizeof(st->act.rt_addr)); 701 st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; 702 st->expire = pf_get_uptime(); 703 if (sp->pfs_1301.expire) { 704 uint32_t timeout; 705 706 timeout = r->timeout[sp->pfs_1301.timeout]; 707 if (!timeout) 708 timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; 709 710 /* sp->expire may have been adaptively scaled by export. */ 711 st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; 712 } 713 714 st->direction = sp->pfs_1301.direction; 715 st->act.log = sp->pfs_1301.log; 716 st->timeout = sp->pfs_1301.timeout; 717 718 st->act.rt = rt; 719 st->act.rt_kif = rt_kif; 720 st->act.rt_af = rt_af; 721 722 switch (msg_version) { 723 case PFSYNC_MSG_VERSION_1301: 724 st->state_flags = sp->pfs_1301.state_flags; 725 /* 726 * In FreeBSD 13 pfsync lacks many attributes. Copy them 727 * from the rule if possible. If rule can't be matched 728 * clear any set options as we can't recover their 729 * parameters. 730 */ 731 if (r == &V_pf_default_rule) { 732 st->state_flags &= ~PFSTATE_SETMASK; 733 } else { 734 /* 735 * Similar to pf_rule_to_actions(). This code 736 * won't set the actions properly if they come 737 * from multiple "match" rules as only rule 738 * creating the state is send over pfsync. 739 */ 740 st->act.qid = r->qid; 741 st->act.pqid = r->pqid; 742 st->act.rtableid = r->rtableid; 743 if (r->scrub_flags & PFSTATE_SETTOS) 744 st->act.set_tos = r->set_tos; 745 st->act.min_ttl = r->min_ttl; 746 st->act.max_mss = r->max_mss; 747 st->state_flags |= (r->scrub_flags & 748 (PFSTATE_NODF|PFSTATE_RANDOMID| 749 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| 750 PFSTATE_SETPRIO)); 751 if (r->dnpipe || r->dnrpipe) { 752 if (r->free_flags & PFRULE_DN_IS_PIPE) 753 st->state_flags |= PFSTATE_DN_IS_PIPE; 754 else 755 st->state_flags &= ~PFSTATE_DN_IS_PIPE; 756 } 757 st->act.dnpipe = r->dnpipe; 758 st->act.dnrpipe = r->dnrpipe; 759 } 760 break; 761 case PFSYNC_MSG_VERSION_1400: 762 st->state_flags = ntohs(sp->pfs_1400.state_flags); 763 st->act.qid = ntohs(sp->pfs_1400.qid); 764 st->act.pqid = ntohs(sp->pfs_1400.pqid); 765 st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); 766 st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); 767 st->act.rtableid = ntohl(sp->pfs_1400.rtableid); 768 st->act.min_ttl = sp->pfs_1400.min_ttl; 769 st->act.set_tos = sp->pfs_1400.set_tos; 770 st->act.max_mss = ntohs(sp->pfs_1400.max_mss); 771 st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; 772 st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; 773 break; 774 default: 775 panic("%s: Unsupported pfsync_msg_version %d", 776 __func__, msg_version); 777 } 778 779 if (! (st->act.rtableid == -1 || 780 (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) 781 goto cleanup; 782 783 st->id = sp->pfs_1301.id; 784 st->creatorid = sp->pfs_1301.creatorid; 785 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 786 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 787 788 st->rule = r; 789 st->nat_rule = NULL; 790 st->anchor = NULL; 791 792 st->pfsync_time = time_uptime; 793 st->sync_state = PFSYNC_S_NONE; 794 795 if (!(flags & PFSYNC_SI_IOCTL)) 796 st->state_flags |= PFSTATE_NOSYNC; 797 798 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 799 goto cleanup_state; 800 801 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 802 counter_u64_add(r->states_cur, 1); 803 counter_u64_add(r->states_tot, 1); 804 805 if (!(flags & PFSYNC_SI_IOCTL)) { 806 st->state_flags &= ~PFSTATE_NOSYNC; 807 if (st->state_flags & PFSTATE_ACK) { 808 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 809 PFSYNC_BUCKET_LOCK(b); 810 pfsync_q_ins(st, PFSYNC_S_IACK, true); 811 PFSYNC_BUCKET_UNLOCK(b); 812 813 pfsync_push_all(sc); 814 } 815 } 816 st->state_flags &= ~PFSTATE_ACK; 817 PF_STATE_UNLOCK(st); 818 819 return (0); 820 821 cleanup: 822 error = ENOMEM; 823 824 if (skw == sks) 825 sks = NULL; 826 uma_zfree(V_pf_state_key_z, skw); 827 uma_zfree(V_pf_state_key_z, sks); 828 829 cleanup_state: /* pf_state_insert() frees the state keys. */ 830 if (st) { 831 st->timeout = PFTM_UNLINKED; /* appease an assert */ 832 pf_free_state(st); 833 } 834 return (error); 835 } 836 837 #ifdef INET 838 static int 839 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 840 { 841 struct pfsync_softc *sc = V_pfsyncif; 842 struct mbuf *m = *mp; 843 struct ip *ip = mtod(m, struct ip *); 844 struct pfsync_header *ph; 845 struct pfsync_subheader subh; 846 847 int offset, len, flags = 0; 848 int rv; 849 uint16_t count; 850 851 PF_RULES_RLOCK_TRACKER; 852 853 *mp = NULL; 854 V_pfsyncstats.pfsyncs_ipackets++; 855 856 /* Verify that we have a sync interface configured. */ 857 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 858 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 859 goto done; 860 861 /* verify that the packet came in on the right interface */ 862 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 863 V_pfsyncstats.pfsyncs_badif++; 864 goto done; 865 } 866 867 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 868 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 869 /* verify that the IP TTL is 255. */ 870 if (ip->ip_ttl != PFSYNC_DFLTTL) { 871 V_pfsyncstats.pfsyncs_badttl++; 872 goto done; 873 } 874 875 offset = ip->ip_hl << 2; 876 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 877 V_pfsyncstats.pfsyncs_hdrops++; 878 goto done; 879 } 880 881 if (offset + sizeof(*ph) > m->m_len) { 882 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 883 V_pfsyncstats.pfsyncs_hdrops++; 884 return (IPPROTO_DONE); 885 } 886 ip = mtod(m, struct ip *); 887 } 888 ph = (struct pfsync_header *)((char *)ip + offset); 889 890 /* verify the version */ 891 if (ph->version != PFSYNC_VERSION) { 892 V_pfsyncstats.pfsyncs_badver++; 893 goto done; 894 } 895 896 len = ntohs(ph->len) + offset; 897 if (m->m_pkthdr.len < len) { 898 V_pfsyncstats.pfsyncs_badlen++; 899 goto done; 900 } 901 902 /* 903 * Trusting pf_chksum during packet processing, as well as seeking 904 * in interface name tree, require holding PF_RULES_RLOCK(). 905 */ 906 PF_RULES_RLOCK(); 907 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 908 flags = PFSYNC_SI_CKSUM; 909 910 offset += sizeof(*ph); 911 while (offset <= len - sizeof(subh)) { 912 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 913 offset += sizeof(subh); 914 915 if (subh.action >= PFSYNC_ACT_MAX) { 916 V_pfsyncstats.pfsyncs_badact++; 917 PF_RULES_RUNLOCK(); 918 goto done; 919 } 920 921 count = ntohs(subh.count); 922 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 923 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 924 if (rv == -1) { 925 PF_RULES_RUNLOCK(); 926 return (IPPROTO_DONE); 927 } 928 929 offset += rv; 930 } 931 PF_RULES_RUNLOCK(); 932 933 done: 934 m_freem(m); 935 return (IPPROTO_DONE); 936 } 937 #endif 938 939 #ifdef INET6 940 static int 941 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) 942 { 943 struct pfsync_softc *sc = V_pfsyncif; 944 struct mbuf *m = *mp; 945 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 946 struct pfsync_header *ph; 947 struct pfsync_subheader subh; 948 949 int offset, len, flags = 0; 950 int rv; 951 uint16_t count; 952 953 PF_RULES_RLOCK_TRACKER; 954 955 *mp = NULL; 956 V_pfsyncstats.pfsyncs_ipackets++; 957 958 /* Verify that we have a sync interface configured. */ 959 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 960 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 961 goto done; 962 963 /* verify that the packet came in on the right interface */ 964 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 965 V_pfsyncstats.pfsyncs_badif++; 966 goto done; 967 } 968 969 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 970 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 971 /* verify that the IP TTL is 255. */ 972 if (ip6->ip6_hlim != PFSYNC_DFLTTL) { 973 V_pfsyncstats.pfsyncs_badttl++; 974 goto done; 975 } 976 977 978 offset = sizeof(*ip6); 979 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 980 V_pfsyncstats.pfsyncs_hdrops++; 981 goto done; 982 } 983 984 if (offset + sizeof(*ph) > m->m_len) { 985 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 986 V_pfsyncstats.pfsyncs_hdrops++; 987 return (IPPROTO_DONE); 988 } 989 ip6 = mtod(m, struct ip6_hdr *); 990 } 991 ph = (struct pfsync_header *)((char *)ip6 + offset); 992 993 /* verify the version */ 994 if (ph->version != PFSYNC_VERSION) { 995 V_pfsyncstats.pfsyncs_badver++; 996 goto done; 997 } 998 999 len = ntohs(ph->len) + offset; 1000 if (m->m_pkthdr.len < len) { 1001 V_pfsyncstats.pfsyncs_badlen++; 1002 goto done; 1003 } 1004 1005 /* 1006 * Trusting pf_chksum during packet processing, as well as seeking 1007 * in interface name tree, require holding PF_RULES_RLOCK(). 1008 */ 1009 PF_RULES_RLOCK(); 1010 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1011 flags = PFSYNC_SI_CKSUM; 1012 1013 offset += sizeof(*ph); 1014 while (offset <= len - sizeof(subh)) { 1015 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1016 offset += sizeof(subh); 1017 1018 if (subh.action >= PFSYNC_ACT_MAX) { 1019 V_pfsyncstats.pfsyncs_badact++; 1020 PF_RULES_RUNLOCK(); 1021 goto done; 1022 } 1023 1024 count = ntohs(subh.count); 1025 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1026 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1027 if (rv == -1) { 1028 PF_RULES_RUNLOCK(); 1029 return (IPPROTO_DONE); 1030 } 1031 1032 offset += rv; 1033 } 1034 PF_RULES_RUNLOCK(); 1035 1036 done: 1037 m_freem(m); 1038 return (IPPROTO_DONE); 1039 } 1040 #endif 1041 1042 static int 1043 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) 1044 { 1045 struct pfsync_clr *clr; 1046 struct mbuf *mp; 1047 int len = sizeof(*clr) * count; 1048 int i, offp; 1049 u_int32_t creatorid; 1050 1051 mp = m_pulldown(m, offset, len, &offp); 1052 if (mp == NULL) { 1053 V_pfsyncstats.pfsyncs_badlen++; 1054 return (-1); 1055 } 1056 clr = (struct pfsync_clr *)(mp->m_data + offp); 1057 1058 for (i = 0; i < count; i++) { 1059 creatorid = clr[i].creatorid; 1060 1061 if (clr[i].ifname[0] != '\0' && 1062 pfi_kkif_find(clr[i].ifname) == NULL) 1063 continue; 1064 1065 for (int i = 0; i <= V_pf_hashmask; i++) { 1066 struct pf_idhash *ih = &V_pf_idhash[i]; 1067 struct pf_kstate *s; 1068 relock: 1069 PF_HASHROW_LOCK(ih); 1070 LIST_FOREACH(s, &ih->states, entry) { 1071 if (s->creatorid == creatorid) { 1072 s->state_flags |= PFSTATE_NOSYNC; 1073 pf_remove_state(s); 1074 goto relock; 1075 } 1076 } 1077 PF_HASHROW_UNLOCK(ih); 1078 } 1079 } 1080 1081 return (len); 1082 } 1083 1084 static int 1085 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) 1086 { 1087 struct mbuf *mp; 1088 union pfsync_state_union *sa, *sp; 1089 int i, offp, total_len, msg_version, msg_len; 1090 1091 switch (action) { 1092 case PFSYNC_ACT_INS_1301: 1093 msg_len = sizeof(struct pfsync_state_1301); 1094 total_len = msg_len * count; 1095 msg_version = PFSYNC_MSG_VERSION_1301; 1096 break; 1097 case PFSYNC_ACT_INS_1400: 1098 msg_len = sizeof(struct pfsync_state_1400); 1099 total_len = msg_len * count; 1100 msg_version = PFSYNC_MSG_VERSION_1400; 1101 break; 1102 default: 1103 V_pfsyncstats.pfsyncs_badver++; 1104 return (-1); 1105 } 1106 1107 mp = m_pulldown(m, offset, total_len, &offp); 1108 if (mp == NULL) { 1109 V_pfsyncstats.pfsyncs_badlen++; 1110 return (-1); 1111 } 1112 sa = (union pfsync_state_union *)(mp->m_data + offp); 1113 1114 for (i = 0; i < count; i++) { 1115 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1116 1117 /* Check for invalid values. */ 1118 if (sp->pfs_1301.timeout >= PFTM_MAX || 1119 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1120 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || 1121 sp->pfs_1301.direction > PF_OUT || 1122 (sp->pfs_1301.af != AF_INET && 1123 sp->pfs_1301.af != AF_INET6)) { 1124 if (V_pf_status.debug >= PF_DEBUG_MISC) 1125 printf("%s: invalid value\n", __func__); 1126 V_pfsyncstats.pfsyncs_badval++; 1127 continue; 1128 } 1129 1130 if (pfsync_state_import(sp, flags, msg_version) != 0) 1131 V_pfsyncstats.pfsyncs_badact++; 1132 } 1133 1134 return (total_len); 1135 } 1136 1137 static int 1138 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) 1139 { 1140 struct pfsync_ins_ack *ia, *iaa; 1141 struct pf_kstate *st; 1142 1143 struct mbuf *mp; 1144 int len = count * sizeof(*ia); 1145 int offp, i; 1146 1147 mp = m_pulldown(m, offset, len, &offp); 1148 if (mp == NULL) { 1149 V_pfsyncstats.pfsyncs_badlen++; 1150 return (-1); 1151 } 1152 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 1153 1154 for (i = 0; i < count; i++) { 1155 ia = &iaa[i]; 1156 1157 st = pf_find_state_byid(ia->id, ia->creatorid); 1158 if (st == NULL) 1159 continue; 1160 1161 if (st->state_flags & PFSTATE_ACK) { 1162 pfsync_undefer_state(st, 0); 1163 } 1164 PF_STATE_UNLOCK(st); 1165 } 1166 /* 1167 * XXX this is not yet implemented, but we know the size of the 1168 * message so we can skip it. 1169 */ 1170 1171 return (count * sizeof(struct pfsync_ins_ack)); 1172 } 1173 1174 static int 1175 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, 1176 struct pfsync_state_peer *dst) 1177 { 1178 int sync = 0; 1179 1180 PF_STATE_LOCK_ASSERT(st); 1181 1182 /* 1183 * The state should never go backwards except 1184 * for syn-proxy states. Neither should the 1185 * sequence window slide backwards. 1186 */ 1187 if ((st->src.state > src->state && 1188 (st->src.state < PF_TCPS_PROXY_SRC || 1189 src->state >= PF_TCPS_PROXY_SRC)) || 1190 1191 (st->src.state == src->state && 1192 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 1193 sync++; 1194 else 1195 pf_state_peer_ntoh(src, &st->src); 1196 1197 if ((st->dst.state > dst->state) || 1198 1199 (st->dst.state >= TCPS_SYN_SENT && 1200 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 1201 sync++; 1202 else 1203 pf_state_peer_ntoh(dst, &st->dst); 1204 1205 return (sync); 1206 } 1207 1208 static int 1209 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) 1210 { 1211 struct pfsync_softc *sc = V_pfsyncif; 1212 union pfsync_state_union *sa, *sp; 1213 struct pf_kstate *st; 1214 struct mbuf *mp; 1215 int sync, offp, i, total_len, msg_len, msg_version; 1216 1217 switch (action) { 1218 case PFSYNC_ACT_UPD_1301: 1219 msg_len = sizeof(struct pfsync_state_1301); 1220 total_len = msg_len * count; 1221 msg_version = PFSYNC_MSG_VERSION_1301; 1222 break; 1223 case PFSYNC_ACT_UPD_1400: 1224 msg_len = sizeof(struct pfsync_state_1400); 1225 total_len = msg_len * count; 1226 msg_version = PFSYNC_MSG_VERSION_1400; 1227 break; 1228 default: 1229 V_pfsyncstats.pfsyncs_badact++; 1230 return (-1); 1231 } 1232 1233 mp = m_pulldown(m, offset, total_len, &offp); 1234 if (mp == NULL) { 1235 V_pfsyncstats.pfsyncs_badlen++; 1236 return (-1); 1237 } 1238 sa = (union pfsync_state_union *)(mp->m_data + offp); 1239 1240 for (i = 0; i < count; i++) { 1241 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1242 1243 /* check for invalid values */ 1244 if (sp->pfs_1301.timeout >= PFTM_MAX || 1245 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1246 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { 1247 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1248 printf("pfsync_input: PFSYNC_ACT_UPD: " 1249 "invalid value\n"); 1250 } 1251 V_pfsyncstats.pfsyncs_badval++; 1252 continue; 1253 } 1254 1255 st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); 1256 if (st == NULL) { 1257 /* insert the update */ 1258 if (pfsync_state_import(sp, flags, msg_version)) 1259 V_pfsyncstats.pfsyncs_badstate++; 1260 continue; 1261 } 1262 1263 if (st->state_flags & PFSTATE_ACK) { 1264 pfsync_undefer_state(st, 1); 1265 } 1266 1267 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1268 sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); 1269 else { 1270 sync = 0; 1271 1272 /* 1273 * Non-TCP protocol state machine always go 1274 * forwards 1275 */ 1276 if (st->src.state > sp->pfs_1301.src.state) 1277 sync++; 1278 else 1279 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 1280 if (st->dst.state > sp->pfs_1301.dst.state) 1281 sync++; 1282 else 1283 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1284 } 1285 if (sync < 2) { 1286 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); 1287 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1288 st->expire = pf_get_uptime(); 1289 st->timeout = sp->pfs_1301.timeout; 1290 } 1291 st->pfsync_time = time_uptime; 1292 1293 if (sync) { 1294 V_pfsyncstats.pfsyncs_stale++; 1295 1296 pfsync_update_state(st); 1297 PF_STATE_UNLOCK(st); 1298 pfsync_push_all(sc); 1299 continue; 1300 } 1301 PF_STATE_UNLOCK(st); 1302 } 1303 1304 return (total_len); 1305 } 1306 1307 static int 1308 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) 1309 { 1310 struct pfsync_softc *sc = V_pfsyncif; 1311 struct pfsync_upd_c *ua, *up; 1312 struct pf_kstate *st; 1313 int len = count * sizeof(*up); 1314 int sync; 1315 struct mbuf *mp; 1316 int offp, i; 1317 1318 mp = m_pulldown(m, offset, len, &offp); 1319 if (mp == NULL) { 1320 V_pfsyncstats.pfsyncs_badlen++; 1321 return (-1); 1322 } 1323 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1324 1325 for (i = 0; i < count; i++) { 1326 up = &ua[i]; 1327 1328 /* check for invalid values */ 1329 if (up->timeout >= PFTM_MAX || 1330 up->src.state > PF_TCPS_PROXY_DST || 1331 up->dst.state > PF_TCPS_PROXY_DST) { 1332 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1333 printf("pfsync_input: " 1334 "PFSYNC_ACT_UPD_C: " 1335 "invalid value\n"); 1336 } 1337 V_pfsyncstats.pfsyncs_badval++; 1338 continue; 1339 } 1340 1341 st = pf_find_state_byid(up->id, up->creatorid); 1342 if (st == NULL) { 1343 /* We don't have this state. Ask for it. */ 1344 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1345 pfsync_request_update(up->creatorid, up->id); 1346 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1347 continue; 1348 } 1349 1350 if (st->state_flags & PFSTATE_ACK) { 1351 pfsync_undefer_state(st, 1); 1352 } 1353 1354 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1355 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1356 else { 1357 sync = 0; 1358 1359 /* 1360 * Non-TCP protocol state machine always go 1361 * forwards 1362 */ 1363 if (st->src.state > up->src.state) 1364 sync++; 1365 else 1366 pf_state_peer_ntoh(&up->src, &st->src); 1367 if (st->dst.state > up->dst.state) 1368 sync++; 1369 else 1370 pf_state_peer_ntoh(&up->dst, &st->dst); 1371 } 1372 if (sync < 2) { 1373 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1374 pf_state_peer_ntoh(&up->dst, &st->dst); 1375 st->expire = pf_get_uptime(); 1376 st->timeout = up->timeout; 1377 } 1378 st->pfsync_time = time_uptime; 1379 1380 if (sync) { 1381 V_pfsyncstats.pfsyncs_stale++; 1382 1383 pfsync_update_state(st); 1384 PF_STATE_UNLOCK(st); 1385 pfsync_push_all(sc); 1386 continue; 1387 } 1388 PF_STATE_UNLOCK(st); 1389 } 1390 1391 return (len); 1392 } 1393 1394 static int 1395 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) 1396 { 1397 struct pfsync_upd_req *ur, *ura; 1398 struct mbuf *mp; 1399 int len = count * sizeof(*ur); 1400 int i, offp; 1401 1402 struct pf_kstate *st; 1403 1404 mp = m_pulldown(m, offset, len, &offp); 1405 if (mp == NULL) { 1406 V_pfsyncstats.pfsyncs_badlen++; 1407 return (-1); 1408 } 1409 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1410 1411 for (i = 0; i < count; i++) { 1412 ur = &ura[i]; 1413 1414 if (ur->id == 0 && ur->creatorid == 0) 1415 pfsync_bulk_start(); 1416 else { 1417 st = pf_find_state_byid(ur->id, ur->creatorid); 1418 if (st == NULL) { 1419 V_pfsyncstats.pfsyncs_badstate++; 1420 continue; 1421 } 1422 if (st->state_flags & PFSTATE_NOSYNC) { 1423 PF_STATE_UNLOCK(st); 1424 continue; 1425 } 1426 1427 pfsync_update_state_req(st); 1428 PF_STATE_UNLOCK(st); 1429 } 1430 } 1431 1432 return (len); 1433 } 1434 1435 static int 1436 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) 1437 { 1438 struct mbuf *mp; 1439 struct pfsync_del_c *sa, *sp; 1440 struct pf_kstate *st; 1441 int len = count * sizeof(*sp); 1442 int offp, i; 1443 1444 mp = m_pulldown(m, offset, len, &offp); 1445 if (mp == NULL) { 1446 V_pfsyncstats.pfsyncs_badlen++; 1447 return (-1); 1448 } 1449 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1450 1451 for (i = 0; i < count; i++) { 1452 sp = &sa[i]; 1453 1454 st = pf_find_state_byid(sp->id, sp->creatorid); 1455 if (st == NULL) { 1456 V_pfsyncstats.pfsyncs_badstate++; 1457 continue; 1458 } 1459 1460 st->state_flags |= PFSTATE_NOSYNC; 1461 pf_remove_state(st); 1462 } 1463 1464 return (len); 1465 } 1466 1467 static int 1468 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) 1469 { 1470 struct pfsync_softc *sc = V_pfsyncif; 1471 struct pfsync_bus *bus; 1472 struct mbuf *mp; 1473 int len = count * sizeof(*bus); 1474 int offp; 1475 1476 PFSYNC_BLOCK(sc); 1477 1478 /* If we're not waiting for a bulk update, who cares. */ 1479 if (sc->sc_ureq_sent == 0) { 1480 PFSYNC_BUNLOCK(sc); 1481 return (len); 1482 } 1483 1484 mp = m_pulldown(m, offset, len, &offp); 1485 if (mp == NULL) { 1486 PFSYNC_BUNLOCK(sc); 1487 V_pfsyncstats.pfsyncs_badlen++; 1488 return (-1); 1489 } 1490 bus = (struct pfsync_bus *)(mp->m_data + offp); 1491 1492 switch (bus->status) { 1493 case PFSYNC_BUS_START: 1494 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1495 V_pf_limits[PF_LIMIT_STATES].limit / 1496 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1497 sizeof(union pfsync_state_union)), 1498 pfsync_bulk_fail, sc); 1499 if (V_pf_status.debug >= PF_DEBUG_MISC) 1500 printf("pfsync: received bulk update start\n"); 1501 break; 1502 1503 case PFSYNC_BUS_END: 1504 if (time_uptime - ntohl(bus->endtime) >= 1505 sc->sc_ureq_sent) { 1506 /* that's it, we're happy */ 1507 sc->sc_ureq_sent = 0; 1508 sc->sc_bulk_tries = 0; 1509 callout_stop(&sc->sc_bulkfail_tmo); 1510 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1511 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1512 "pfsync bulk done"); 1513 sc->sc_flags |= PFSYNCF_OK; 1514 if (V_pf_status.debug >= PF_DEBUG_MISC) 1515 printf("pfsync: received valid " 1516 "bulk update end\n"); 1517 } else { 1518 if (V_pf_status.debug >= PF_DEBUG_MISC) 1519 printf("pfsync: received invalid " 1520 "bulk update end: bad timestamp\n"); 1521 } 1522 break; 1523 } 1524 PFSYNC_BUNLOCK(sc); 1525 1526 return (len); 1527 } 1528 1529 static int 1530 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) 1531 { 1532 int len = count * sizeof(struct pfsync_tdb); 1533 1534 #if defined(IPSEC) 1535 struct pfsync_tdb *tp; 1536 struct mbuf *mp; 1537 int offp; 1538 int i; 1539 int s; 1540 1541 mp = m_pulldown(m, offset, len, &offp); 1542 if (mp == NULL) { 1543 V_pfsyncstats.pfsyncs_badlen++; 1544 return (-1); 1545 } 1546 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1547 1548 for (i = 0; i < count; i++) 1549 pfsync_update_net_tdb(&tp[i]); 1550 #endif 1551 1552 return (len); 1553 } 1554 1555 #if defined(IPSEC) 1556 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1557 static void 1558 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1559 { 1560 struct tdb *tdb; 1561 int s; 1562 1563 /* check for invalid values */ 1564 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1565 (pt->dst.sa.sa_family != AF_INET && 1566 pt->dst.sa.sa_family != AF_INET6)) 1567 goto bad; 1568 1569 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1570 if (tdb) { 1571 pt->rpl = ntohl(pt->rpl); 1572 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1573 1574 /* Neither replay nor byte counter should ever decrease. */ 1575 if (pt->rpl < tdb->tdb_rpl || 1576 pt->cur_bytes < tdb->tdb_cur_bytes) { 1577 goto bad; 1578 } 1579 1580 tdb->tdb_rpl = pt->rpl; 1581 tdb->tdb_cur_bytes = pt->cur_bytes; 1582 } 1583 return; 1584 1585 bad: 1586 if (V_pf_status.debug >= PF_DEBUG_MISC) 1587 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1588 "invalid value\n"); 1589 V_pfsyncstats.pfsyncs_badstate++; 1590 return; 1591 } 1592 #endif 1593 1594 static int 1595 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) 1596 { 1597 /* check if we are at the right place in the packet */ 1598 if (offset != m->m_pkthdr.len) 1599 V_pfsyncstats.pfsyncs_badlen++; 1600 1601 /* we're done. free and let the caller return */ 1602 m_freem(m); 1603 return (-1); 1604 } 1605 1606 static int 1607 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) 1608 { 1609 V_pfsyncstats.pfsyncs_badact++; 1610 1611 m_freem(m); 1612 return (-1); 1613 } 1614 1615 static int 1616 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1617 struct route *rt) 1618 { 1619 m_freem(m); 1620 return (0); 1621 } 1622 1623 /* ARGSUSED */ 1624 static int 1625 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1626 { 1627 struct pfsync_softc *sc = ifp->if_softc; 1628 struct ifreq *ifr = (struct ifreq *)data; 1629 struct pfsyncreq pfsyncr; 1630 size_t nvbuflen; 1631 int error; 1632 int c; 1633 1634 switch (cmd) { 1635 case SIOCSIFFLAGS: 1636 PFSYNC_LOCK(sc); 1637 if (ifp->if_flags & IFF_UP) { 1638 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1639 PFSYNC_UNLOCK(sc); 1640 pfsync_pointers_init(); 1641 } else { 1642 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1643 PFSYNC_UNLOCK(sc); 1644 pfsync_pointers_uninit(); 1645 } 1646 break; 1647 case SIOCSIFMTU: 1648 if (!sc->sc_sync_if || 1649 ifr->ifr_mtu <= PFSYNC_MINPKT || 1650 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1651 return (EINVAL); 1652 if (ifr->ifr_mtu < ifp->if_mtu) { 1653 for (c = 0; c < pfsync_buckets; c++) { 1654 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1655 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1656 pfsync_sendout(1, c); 1657 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1658 } 1659 } 1660 ifp->if_mtu = ifr->ifr_mtu; 1661 break; 1662 case SIOCGETPFSYNC: 1663 bzero(&pfsyncr, sizeof(pfsyncr)); 1664 PFSYNC_LOCK(sc); 1665 if (sc->sc_sync_if) { 1666 strlcpy(pfsyncr.pfsyncr_syncdev, 1667 sc->sc_sync_if->if_xname, IFNAMSIZ); 1668 } 1669 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1670 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1671 pfsyncr.pfsyncr_defer = sc->sc_flags; 1672 PFSYNC_UNLOCK(sc); 1673 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1674 sizeof(pfsyncr))); 1675 1676 case SIOCGETPFSYNCNV: 1677 { 1678 nvlist_t *nvl_syncpeer; 1679 nvlist_t *nvl = nvlist_create(0); 1680 1681 if (nvl == NULL) 1682 return (ENOMEM); 1683 1684 if (sc->sc_sync_if) 1685 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1686 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1687 nvlist_add_number(nvl, "flags", sc->sc_flags); 1688 nvlist_add_number(nvl, "version", sc->sc_version); 1689 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1690 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1691 1692 void *packed = NULL; 1693 packed = nvlist_pack(nvl, &nvbuflen); 1694 if (packed == NULL) { 1695 free(packed, M_NVLIST); 1696 nvlist_destroy(nvl); 1697 return (ENOMEM); 1698 } 1699 1700 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1701 ifr->ifr_cap_nv.length = nvbuflen; 1702 ifr->ifr_cap_nv.buffer = NULL; 1703 free(packed, M_NVLIST); 1704 nvlist_destroy(nvl); 1705 return (EFBIG); 1706 } 1707 1708 ifr->ifr_cap_nv.length = nvbuflen; 1709 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1710 1711 nvlist_destroy(nvl); 1712 nvlist_destroy(nvl_syncpeer); 1713 free(packed, M_NVLIST); 1714 break; 1715 } 1716 1717 case SIOCSETPFSYNC: 1718 { 1719 struct pfsync_kstatus status; 1720 1721 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1722 return (error); 1723 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1724 sizeof(pfsyncr)))) 1725 return (error); 1726 1727 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1728 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1729 1730 error = pfsync_kstatus_to_softc(&status, sc); 1731 return (error); 1732 } 1733 case SIOCSETPFSYNCNV: 1734 { 1735 struct pfsync_kstatus status; 1736 void *data; 1737 nvlist_t *nvl; 1738 1739 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1740 return (error); 1741 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1742 return (EINVAL); 1743 1744 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1745 1746 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1747 ifr->ifr_cap_nv.length)) != 0) { 1748 free(data, M_TEMP); 1749 return (error); 1750 } 1751 1752 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1753 free(data, M_TEMP); 1754 return (EINVAL); 1755 } 1756 1757 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1758 pfsync_nvstatus_to_kstatus(nvl, &status); 1759 1760 nvlist_destroy(nvl); 1761 free(data, M_TEMP); 1762 1763 error = pfsync_kstatus_to_softc(&status, sc); 1764 return (error); 1765 } 1766 default: 1767 return (ENOTTY); 1768 } 1769 1770 return (0); 1771 } 1772 1773 static void 1774 pfsync_out_state_1301(struct pf_kstate *st, void *buf) 1775 { 1776 union pfsync_state_union *sp = buf; 1777 1778 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); 1779 } 1780 1781 static void 1782 pfsync_out_state_1400(struct pf_kstate *st, void *buf) 1783 { 1784 union pfsync_state_union *sp = buf; 1785 1786 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); 1787 } 1788 1789 static void 1790 pfsync_out_iack(struct pf_kstate *st, void *buf) 1791 { 1792 struct pfsync_ins_ack *iack = buf; 1793 1794 iack->id = st->id; 1795 iack->creatorid = st->creatorid; 1796 } 1797 1798 static void 1799 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1800 { 1801 struct pfsync_upd_c *up = buf; 1802 1803 bzero(up, sizeof(*up)); 1804 up->id = st->id; 1805 pf_state_peer_hton(&st->src, &up->src); 1806 pf_state_peer_hton(&st->dst, &up->dst); 1807 up->creatorid = st->creatorid; 1808 up->timeout = st->timeout; 1809 } 1810 1811 static void 1812 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1813 { 1814 struct pfsync_del_c *dp = buf; 1815 1816 dp->id = st->id; 1817 dp->creatorid = st->creatorid; 1818 st->state_flags |= PFSTATE_NOSYNC; 1819 } 1820 1821 static void 1822 pfsync_drop_all(struct pfsync_softc *sc) 1823 { 1824 struct pfsync_bucket *b; 1825 int c; 1826 1827 for (c = 0; c < pfsync_buckets; c++) { 1828 b = &sc->sc_buckets[c]; 1829 1830 PFSYNC_BUCKET_LOCK(b); 1831 pfsync_drop(sc, c); 1832 PFSYNC_BUCKET_UNLOCK(b); 1833 } 1834 } 1835 1836 static void 1837 pfsync_drop(struct pfsync_softc *sc, int c) 1838 { 1839 struct pf_kstate *st, *next; 1840 struct pfsync_upd_req_item *ur; 1841 struct pfsync_bucket *b; 1842 enum pfsync_q_id q; 1843 1844 b = &sc->sc_buckets[c]; 1845 PFSYNC_BUCKET_LOCK_ASSERT(b); 1846 1847 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1848 if (TAILQ_EMPTY(&b->b_qs[q])) 1849 continue; 1850 1851 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1852 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1853 ("%s: st->sync_state %d == q %d", 1854 __func__, st->sync_state, q)); 1855 st->sync_state = PFSYNC_S_NONE; 1856 pf_release_state(st); 1857 } 1858 TAILQ_INIT(&b->b_qs[q]); 1859 } 1860 1861 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1862 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1863 free(ur, M_PFSYNC); 1864 } 1865 1866 b->b_len = PFSYNC_MINPKT; 1867 free(b->b_plus, M_PFSYNC); 1868 b->b_plus = NULL; 1869 b->b_pluslen = 0; 1870 } 1871 1872 static void 1873 pfsync_sendout(int schedswi, int c) 1874 { 1875 struct pfsync_softc *sc = V_pfsyncif; 1876 struct ifnet *ifp = sc->sc_ifp; 1877 struct mbuf *m; 1878 struct pfsync_header *ph; 1879 struct pfsync_subheader *subh; 1880 struct pf_kstate *st, *st_next; 1881 struct pfsync_upd_req_item *ur; 1882 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1883 size_t len; 1884 int aflen, offset, count = 0; 1885 enum pfsync_q_id q; 1886 1887 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1888 KASSERT(b->b_len > PFSYNC_MINPKT, 1889 ("%s: sc_len %zu", __func__, b->b_len)); 1890 PFSYNC_BUCKET_LOCK_ASSERT(b); 1891 1892 if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) { 1893 pfsync_drop(sc, c); 1894 return; 1895 } 1896 1897 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1898 if (m == NULL) { 1899 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1900 V_pfsyncstats.pfsyncs_onomem++; 1901 return; 1902 } 1903 m->m_data += max_linkhdr; 1904 bzero(m->m_data, b->b_len); 1905 1906 len = b->b_len; 1907 1908 /* build the ip header */ 1909 switch (sc->sc_sync_peer.ss_family) { 1910 #ifdef INET 1911 case AF_INET: 1912 { 1913 struct ip *ip; 1914 1915 ip = mtod(m, struct ip *); 1916 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1917 aflen = offset = sizeof(*ip); 1918 1919 len -= sizeof(union inet_template) - sizeof(struct ip); 1920 ip->ip_len = htons(len); 1921 ip_fillid(ip, V_ip_random_id); 1922 break; 1923 } 1924 #endif 1925 #ifdef INET6 1926 case AF_INET6: 1927 { 1928 struct ip6_hdr *ip6; 1929 1930 ip6 = mtod(m, struct ip6_hdr *); 1931 bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); 1932 aflen = offset = sizeof(*ip6); 1933 1934 len -= sizeof(union inet_template) - sizeof(struct ip6_hdr); 1935 ip6->ip6_plen = htons(len); 1936 break; 1937 } 1938 #endif 1939 default: 1940 m_freem(m); 1941 pfsync_drop(sc, c); 1942 return; 1943 } 1944 m->m_len = m->m_pkthdr.len = len; 1945 1946 /* build the pfsync header */ 1947 ph = (struct pfsync_header *)(m->m_data + offset); 1948 offset += sizeof(*ph); 1949 1950 ph->version = PFSYNC_VERSION; 1951 ph->len = htons(len - aflen); 1952 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1953 1954 /* walk the queues */ 1955 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1956 if (TAILQ_EMPTY(&b->b_qs[q])) 1957 continue; 1958 1959 subh = (struct pfsync_subheader *)(m->m_data + offset); 1960 offset += sizeof(*subh); 1961 1962 count = 0; 1963 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1964 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1965 ("%s: st->sync_state == q", 1966 __func__)); 1967 /* 1968 * XXXGL: some of write methods do unlocked reads 1969 * of state data :( 1970 */ 1971 pfsync_qs[q].write(st, m->m_data + offset); 1972 offset += pfsync_qs[q].len; 1973 st->sync_state = PFSYNC_S_NONE; 1974 pf_release_state(st); 1975 count++; 1976 } 1977 TAILQ_INIT(&b->b_qs[q]); 1978 1979 subh->action = pfsync_qs[q].action; 1980 subh->count = htons(count); 1981 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1982 } 1983 1984 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1985 subh = (struct pfsync_subheader *)(m->m_data + offset); 1986 offset += sizeof(*subh); 1987 1988 count = 0; 1989 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1990 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1991 1992 bcopy(&ur->ur_msg, m->m_data + offset, 1993 sizeof(ur->ur_msg)); 1994 offset += sizeof(ur->ur_msg); 1995 free(ur, M_PFSYNC); 1996 count++; 1997 } 1998 1999 subh->action = PFSYNC_ACT_UPD_REQ; 2000 subh->count = htons(count); 2001 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 2002 } 2003 2004 /* has someone built a custom region for us to add? */ 2005 if (b->b_plus != NULL) { 2006 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 2007 offset += b->b_pluslen; 2008 2009 free(b->b_plus, M_PFSYNC); 2010 b->b_plus = NULL; 2011 b->b_pluslen = 0; 2012 } 2013 2014 subh = (struct pfsync_subheader *)(m->m_data + offset); 2015 offset += sizeof(*subh); 2016 2017 subh->action = PFSYNC_ACT_EOF; 2018 subh->count = htons(1); 2019 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 2020 2021 /* we're done, let's put it on the wire */ 2022 if (bpf_peers_present(ifp->if_bpf)) { 2023 m->m_data += aflen; 2024 m->m_len = m->m_pkthdr.len = len - aflen; 2025 bpf_mtap(ifp->if_bpf, m); 2026 m->m_data -= aflen; 2027 m->m_len = m->m_pkthdr.len = len; 2028 } 2029 2030 if (sc->sc_sync_if == NULL) { 2031 b->b_len = PFSYNC_MINPKT; 2032 m_freem(m); 2033 return; 2034 } 2035 2036 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 2037 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 2038 b->b_len = PFSYNC_MINPKT; 2039 2040 if (!_IF_QFULL(&b->b_snd)) 2041 _IF_ENQUEUE(&b->b_snd, m); 2042 else { 2043 m_freem(m); 2044 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 2045 } 2046 if (schedswi) 2047 swi_sched(V_pfsync_swi_cookie, 0); 2048 } 2049 2050 static void 2051 pfsync_insert_state(struct pf_kstate *st) 2052 { 2053 struct pfsync_softc *sc = V_pfsyncif; 2054 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2055 2056 if (st->state_flags & PFSTATE_NOSYNC) 2057 return; 2058 2059 if ((st->rule->rule_flag & PFRULE_NOSYNC) || 2060 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 2061 st->state_flags |= PFSTATE_NOSYNC; 2062 return; 2063 } 2064 2065 KASSERT(st->sync_state == PFSYNC_S_NONE, 2066 ("%s: st->sync_state %u", __func__, st->sync_state)); 2067 2068 PFSYNC_BUCKET_LOCK(b); 2069 if (b->b_len == PFSYNC_MINPKT) 2070 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2071 2072 pfsync_q_ins(st, PFSYNC_S_INS, true); 2073 PFSYNC_BUCKET_UNLOCK(b); 2074 2075 st->sync_updates = 0; 2076 } 2077 2078 static int 2079 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 2080 { 2081 struct pfsync_softc *sc = V_pfsyncif; 2082 struct pfsync_deferral *pd; 2083 struct pfsync_bucket *b; 2084 2085 if (m->m_flags & (M_BCAST|M_MCAST)) 2086 return (0); 2087 2088 if (sc == NULL) 2089 return (0); 2090 2091 b = pfsync_get_bucket(sc, st); 2092 2093 PFSYNC_LOCK(sc); 2094 2095 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 2096 !(sc->sc_flags & PFSYNCF_DEFER)) { 2097 PFSYNC_UNLOCK(sc); 2098 return (0); 2099 } 2100 2101 PFSYNC_BUCKET_LOCK(b); 2102 PFSYNC_UNLOCK(sc); 2103 2104 if (b->b_deferred >= 128) 2105 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 2106 2107 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 2108 if (pd == NULL) { 2109 PFSYNC_BUCKET_UNLOCK(b); 2110 return (0); 2111 } 2112 b->b_deferred++; 2113 2114 m->m_flags |= M_SKIP_FIREWALL; 2115 st->state_flags |= PFSTATE_ACK; 2116 2117 pd->pd_sc = sc; 2118 pd->pd_st = st; 2119 pf_ref_state(st); 2120 pd->pd_m = m; 2121 2122 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 2123 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 2124 callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, 2125 pfsync_defer_tmo, pd); 2126 2127 pfsync_push(b); 2128 PFSYNC_BUCKET_UNLOCK(b); 2129 2130 return (1); 2131 } 2132 2133 static void 2134 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2135 { 2136 struct pfsync_softc *sc = pd->pd_sc; 2137 struct mbuf *m = pd->pd_m; 2138 struct pf_kstate *st = pd->pd_st; 2139 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2140 2141 PFSYNC_BUCKET_LOCK_ASSERT(b); 2142 2143 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2144 b->b_deferred--; 2145 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2146 free(pd, M_PFSYNC); 2147 pf_release_state(st); 2148 2149 if (drop) 2150 m_freem(m); 2151 else { 2152 _IF_ENQUEUE(&b->b_snd, m); 2153 pfsync_push(b); 2154 } 2155 } 2156 2157 static void 2158 pfsync_defer_tmo(void *arg) 2159 { 2160 struct epoch_tracker et; 2161 struct pfsync_deferral *pd = arg; 2162 struct pfsync_softc *sc = pd->pd_sc; 2163 struct mbuf *m = pd->pd_m; 2164 struct pf_kstate *st = pd->pd_st; 2165 struct pfsync_bucket *b; 2166 2167 CURVNET_SET(sc->sc_ifp->if_vnet); 2168 2169 b = pfsync_get_bucket(sc, st); 2170 2171 PFSYNC_BUCKET_LOCK_ASSERT(b); 2172 2173 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2174 b->b_deferred--; 2175 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2176 PFSYNC_BUCKET_UNLOCK(b); 2177 free(pd, M_PFSYNC); 2178 2179 if (sc->sc_sync_if == NULL) { 2180 pf_release_state(st); 2181 m_freem(m); 2182 CURVNET_RESTORE(); 2183 return; 2184 } 2185 2186 NET_EPOCH_ENTER(et); 2187 2188 pfsync_tx(sc, m); 2189 2190 pf_release_state(st); 2191 2192 CURVNET_RESTORE(); 2193 NET_EPOCH_EXIT(et); 2194 } 2195 2196 static void 2197 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 2198 { 2199 struct pfsync_softc *sc = V_pfsyncif; 2200 struct pfsync_deferral *pd; 2201 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2202 2203 PFSYNC_BUCKET_LOCK_ASSERT(b); 2204 2205 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 2206 if (pd->pd_st == st) { 2207 if (callout_stop(&pd->pd_tmo) > 0) 2208 pfsync_undefer(pd, drop); 2209 2210 return; 2211 } 2212 } 2213 2214 panic("%s: unable to find deferred state", __func__); 2215 } 2216 2217 static void 2218 pfsync_undefer_state(struct pf_kstate *st, int drop) 2219 { 2220 struct pfsync_softc *sc = V_pfsyncif; 2221 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2222 2223 PFSYNC_BUCKET_LOCK(b); 2224 pfsync_undefer_state_locked(st, drop); 2225 PFSYNC_BUCKET_UNLOCK(b); 2226 } 2227 2228 static struct pfsync_bucket* 2229 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 2230 { 2231 int c = PF_IDHASH(st) % pfsync_buckets; 2232 return &sc->sc_buckets[c]; 2233 } 2234 2235 static void 2236 pfsync_update_state(struct pf_kstate *st) 2237 { 2238 struct pfsync_softc *sc = V_pfsyncif; 2239 bool sync = false, ref = true; 2240 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2241 2242 PF_STATE_LOCK_ASSERT(st); 2243 PFSYNC_BUCKET_LOCK(b); 2244 2245 if (st->state_flags & PFSTATE_ACK) 2246 pfsync_undefer_state_locked(st, 0); 2247 if (st->state_flags & PFSTATE_NOSYNC) { 2248 if (st->sync_state != PFSYNC_S_NONE) 2249 pfsync_q_del(st, true, b); 2250 PFSYNC_BUCKET_UNLOCK(b); 2251 return; 2252 } 2253 2254 if (b->b_len == PFSYNC_MINPKT) 2255 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2256 2257 switch (st->sync_state) { 2258 case PFSYNC_S_UPD_C: 2259 case PFSYNC_S_UPD: 2260 case PFSYNC_S_INS: 2261 /* we're already handling it */ 2262 2263 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2264 st->sync_updates++; 2265 if (st->sync_updates >= sc->sc_maxupdates) 2266 sync = true; 2267 } 2268 break; 2269 2270 case PFSYNC_S_IACK: 2271 pfsync_q_del(st, false, b); 2272 ref = false; 2273 /* FALLTHROUGH */ 2274 2275 case PFSYNC_S_NONE: 2276 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 2277 st->sync_updates = 0; 2278 break; 2279 2280 default: 2281 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2282 } 2283 2284 if (sync || (time_uptime - st->pfsync_time) < 2) 2285 pfsync_push(b); 2286 2287 PFSYNC_BUCKET_UNLOCK(b); 2288 } 2289 2290 static void 2291 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2292 { 2293 struct pfsync_softc *sc = V_pfsyncif; 2294 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2295 struct pfsync_upd_req_item *item; 2296 size_t nlen = sizeof(struct pfsync_upd_req); 2297 2298 PFSYNC_BUCKET_LOCK_ASSERT(b); 2299 2300 /* 2301 * This code does a bit to prevent multiple update requests for the 2302 * same state being generated. It searches current subheader queue, 2303 * but it doesn't lookup into queue of already packed datagrams. 2304 */ 2305 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 2306 if (item->ur_msg.id == id && 2307 item->ur_msg.creatorid == creatorid) 2308 return; 2309 2310 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 2311 if (item == NULL) 2312 return; /* XXX stats */ 2313 2314 item->ur_msg.id = id; 2315 item->ur_msg.creatorid = creatorid; 2316 2317 if (TAILQ_EMPTY(&b->b_upd_req_list)) 2318 nlen += sizeof(struct pfsync_subheader); 2319 2320 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2321 pfsync_sendout(0, 0); 2322 2323 nlen = sizeof(struct pfsync_subheader) + 2324 sizeof(struct pfsync_upd_req); 2325 } 2326 2327 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2328 b->b_len += nlen; 2329 2330 pfsync_push(b); 2331 } 2332 2333 static bool 2334 pfsync_update_state_req(struct pf_kstate *st) 2335 { 2336 struct pfsync_softc *sc = V_pfsyncif; 2337 bool ref = true, full = false; 2338 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2339 2340 PF_STATE_LOCK_ASSERT(st); 2341 PFSYNC_BUCKET_LOCK(b); 2342 2343 if (st->state_flags & PFSTATE_NOSYNC) { 2344 if (st->sync_state != PFSYNC_S_NONE) 2345 pfsync_q_del(st, true, b); 2346 PFSYNC_BUCKET_UNLOCK(b); 2347 return (full); 2348 } 2349 2350 switch (st->sync_state) { 2351 case PFSYNC_S_UPD_C: 2352 case PFSYNC_S_IACK: 2353 pfsync_q_del(st, false, b); 2354 ref = false; 2355 /* FALLTHROUGH */ 2356 2357 case PFSYNC_S_NONE: 2358 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2359 pfsync_push(b); 2360 break; 2361 2362 case PFSYNC_S_INS: 2363 case PFSYNC_S_UPD: 2364 case PFSYNC_S_DEL_C: 2365 /* we're already handling it */ 2366 break; 2367 2368 default: 2369 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2370 } 2371 2372 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) 2373 full = true; 2374 2375 PFSYNC_BUCKET_UNLOCK(b); 2376 2377 return (full); 2378 } 2379 2380 static void 2381 pfsync_delete_state(struct pf_kstate *st) 2382 { 2383 struct pfsync_softc *sc = V_pfsyncif; 2384 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2385 bool ref = true; 2386 2387 PFSYNC_BUCKET_LOCK(b); 2388 if (st->state_flags & PFSTATE_ACK) 2389 pfsync_undefer_state_locked(st, 1); 2390 if (st->state_flags & PFSTATE_NOSYNC) { 2391 if (st->sync_state != PFSYNC_S_NONE) 2392 pfsync_q_del(st, true, b); 2393 PFSYNC_BUCKET_UNLOCK(b); 2394 return; 2395 } 2396 2397 if (b->b_len == PFSYNC_MINPKT) 2398 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2399 2400 switch (st->sync_state) { 2401 case PFSYNC_S_INS: 2402 /* We never got to tell the world so just forget about it. */ 2403 pfsync_q_del(st, true, b); 2404 break; 2405 2406 case PFSYNC_S_UPD_C: 2407 case PFSYNC_S_UPD: 2408 case PFSYNC_S_IACK: 2409 pfsync_q_del(st, false, b); 2410 ref = false; 2411 /* FALLTHROUGH */ 2412 2413 case PFSYNC_S_NONE: 2414 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2415 break; 2416 2417 default: 2418 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2419 } 2420 2421 PFSYNC_BUCKET_UNLOCK(b); 2422 } 2423 2424 static void 2425 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2426 { 2427 struct { 2428 struct pfsync_subheader subh; 2429 struct pfsync_clr clr; 2430 } __packed r; 2431 2432 bzero(&r, sizeof(r)); 2433 2434 r.subh.action = PFSYNC_ACT_CLR; 2435 r.subh.count = htons(1); 2436 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2437 2438 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2439 r.clr.creatorid = creatorid; 2440 2441 pfsync_send_plus(&r, sizeof(r)); 2442 } 2443 2444 static enum pfsync_q_id 2445 pfsync_sstate_to_qid(u_int8_t sync_state) 2446 { 2447 struct pfsync_softc *sc = V_pfsyncif; 2448 2449 switch (sync_state) { 2450 case PFSYNC_S_INS: 2451 switch (sc->sc_version) { 2452 case PFSYNC_MSG_VERSION_1301: 2453 return PFSYNC_Q_INS_1301; 2454 case PFSYNC_MSG_VERSION_1400: 2455 return PFSYNC_Q_INS_1400; 2456 } 2457 break; 2458 case PFSYNC_S_IACK: 2459 return PFSYNC_Q_IACK; 2460 case PFSYNC_S_UPD: 2461 switch (sc->sc_version) { 2462 case PFSYNC_MSG_VERSION_1301: 2463 return PFSYNC_Q_UPD_1301; 2464 case PFSYNC_MSG_VERSION_1400: 2465 return PFSYNC_Q_UPD_1400; 2466 } 2467 break; 2468 case PFSYNC_S_UPD_C: 2469 return PFSYNC_Q_UPD_C; 2470 case PFSYNC_S_DEL_C: 2471 return PFSYNC_Q_DEL_C; 2472 default: 2473 panic("%s: Unsupported st->sync_state 0x%02x", 2474 __func__, sync_state); 2475 } 2476 2477 panic("%s: Unsupported pfsync_msg_version %d", 2478 __func__, sc->sc_version); 2479 } 2480 2481 static void 2482 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) 2483 { 2484 enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); 2485 struct pfsync_softc *sc = V_pfsyncif; 2486 size_t nlen = pfsync_qs[q].len; 2487 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2488 2489 PFSYNC_BUCKET_LOCK_ASSERT(b); 2490 2491 KASSERT(st->sync_state == PFSYNC_S_NONE, 2492 ("%s: st->sync_state %u", __func__, st->sync_state)); 2493 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2494 b->b_len)); 2495 2496 if (TAILQ_EMPTY(&b->b_qs[q])) 2497 nlen += sizeof(struct pfsync_subheader); 2498 2499 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2500 pfsync_sendout(1, b->b_id); 2501 2502 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2503 } 2504 2505 b->b_len += nlen; 2506 st->sync_state = pfsync_qid_sstate[q]; 2507 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2508 if (ref) 2509 pf_ref_state(st); 2510 } 2511 2512 static void 2513 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2514 { 2515 enum pfsync_q_id q; 2516 2517 PFSYNC_BUCKET_LOCK_ASSERT(b); 2518 KASSERT(st->sync_state != PFSYNC_S_NONE, 2519 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2520 2521 q = pfsync_sstate_to_qid(st->sync_state); 2522 b->b_len -= pfsync_qs[q].len; 2523 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2524 st->sync_state = PFSYNC_S_NONE; 2525 if (unref) 2526 pf_release_state(st); 2527 2528 if (TAILQ_EMPTY(&b->b_qs[q])) 2529 b->b_len -= sizeof(struct pfsync_subheader); 2530 } 2531 2532 static void 2533 pfsync_bulk_start(void) 2534 { 2535 struct pfsync_softc *sc = V_pfsyncif; 2536 2537 if (V_pf_status.debug >= PF_DEBUG_MISC) 2538 printf("pfsync: received bulk update request\n"); 2539 2540 PFSYNC_BLOCK(sc); 2541 2542 sc->sc_ureq_received = time_uptime; 2543 sc->sc_bulk_hashid = 0; 2544 sc->sc_bulk_stateid = 0; 2545 pfsync_bulk_status(PFSYNC_BUS_START); 2546 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2547 PFSYNC_BUNLOCK(sc); 2548 } 2549 2550 static void 2551 pfsync_bulk_update(void *arg) 2552 { 2553 struct pfsync_softc *sc = arg; 2554 struct pf_kstate *s; 2555 int i; 2556 2557 PFSYNC_BLOCK_ASSERT(sc); 2558 CURVNET_SET(sc->sc_ifp->if_vnet); 2559 2560 /* 2561 * Start with last state from previous invocation. 2562 * It may had gone, in this case start from the 2563 * hash slot. 2564 */ 2565 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2566 2567 if (s != NULL) 2568 i = PF_IDHASH(s); 2569 else 2570 i = sc->sc_bulk_hashid; 2571 2572 for (; i <= V_pf_hashmask; i++) { 2573 struct pf_idhash *ih = &V_pf_idhash[i]; 2574 2575 if (s != NULL) 2576 PF_HASHROW_ASSERT(ih); 2577 else { 2578 PF_HASHROW_LOCK(ih); 2579 s = LIST_FIRST(&ih->states); 2580 } 2581 2582 for (; s; s = LIST_NEXT(s, entry)) { 2583 if (s->sync_state == PFSYNC_S_NONE && 2584 s->timeout < PFTM_MAX && 2585 s->pfsync_time <= sc->sc_ureq_received) { 2586 if (pfsync_update_state_req(s)) { 2587 /* We've filled a packet. */ 2588 sc->sc_bulk_hashid = i; 2589 sc->sc_bulk_stateid = s->id; 2590 sc->sc_bulk_creatorid = s->creatorid; 2591 PF_HASHROW_UNLOCK(ih); 2592 callout_reset(&sc->sc_bulk_tmo, 1, 2593 pfsync_bulk_update, sc); 2594 goto full; 2595 } 2596 } 2597 } 2598 PF_HASHROW_UNLOCK(ih); 2599 } 2600 2601 /* We're done. */ 2602 pfsync_bulk_status(PFSYNC_BUS_END); 2603 full: 2604 CURVNET_RESTORE(); 2605 } 2606 2607 static void 2608 pfsync_bulk_status(u_int8_t status) 2609 { 2610 struct { 2611 struct pfsync_subheader subh; 2612 struct pfsync_bus bus; 2613 } __packed r; 2614 2615 struct pfsync_softc *sc = V_pfsyncif; 2616 2617 bzero(&r, sizeof(r)); 2618 2619 r.subh.action = PFSYNC_ACT_BUS; 2620 r.subh.count = htons(1); 2621 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2622 2623 r.bus.creatorid = V_pf_status.hostid; 2624 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2625 r.bus.status = status; 2626 2627 pfsync_send_plus(&r, sizeof(r)); 2628 } 2629 2630 static void 2631 pfsync_bulk_fail(void *arg) 2632 { 2633 struct pfsync_softc *sc = arg; 2634 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2635 2636 CURVNET_SET(sc->sc_ifp->if_vnet); 2637 2638 PFSYNC_BLOCK_ASSERT(sc); 2639 2640 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2641 /* Try again */ 2642 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2643 pfsync_bulk_fail, V_pfsyncif); 2644 PFSYNC_BUCKET_LOCK(b); 2645 pfsync_request_update(0, 0); 2646 PFSYNC_BUCKET_UNLOCK(b); 2647 } else { 2648 /* Pretend like the transfer was ok. */ 2649 sc->sc_ureq_sent = 0; 2650 sc->sc_bulk_tries = 0; 2651 PFSYNC_LOCK(sc); 2652 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2653 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2654 "pfsync bulk fail"); 2655 sc->sc_flags |= PFSYNCF_OK; 2656 PFSYNC_UNLOCK(sc); 2657 if (V_pf_status.debug >= PF_DEBUG_MISC) 2658 printf("pfsync: failed to receive bulk update\n"); 2659 } 2660 2661 CURVNET_RESTORE(); 2662 } 2663 2664 static void 2665 pfsync_send_plus(void *plus, size_t pluslen) 2666 { 2667 struct pfsync_softc *sc = V_pfsyncif; 2668 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2669 uint8_t *newplus; 2670 2671 PFSYNC_BUCKET_LOCK(b); 2672 2673 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2674 pfsync_sendout(1, b->b_id); 2675 2676 newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT); 2677 if (newplus == NULL) 2678 goto out; 2679 2680 if (b->b_plus != NULL) { 2681 memcpy(newplus, b->b_plus, b->b_pluslen); 2682 free(b->b_plus, M_PFSYNC); 2683 } else { 2684 MPASS(b->b_pluslen == 0); 2685 } 2686 memcpy(newplus + b->b_pluslen, plus, pluslen); 2687 2688 b->b_plus = newplus; 2689 b->b_pluslen += pluslen; 2690 b->b_len += pluslen; 2691 2692 pfsync_sendout(1, b->b_id); 2693 2694 out: 2695 PFSYNC_BUCKET_UNLOCK(b); 2696 } 2697 2698 static void 2699 pfsync_timeout(void *arg) 2700 { 2701 struct pfsync_bucket *b = arg; 2702 2703 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2704 PFSYNC_BUCKET_LOCK(b); 2705 pfsync_push(b); 2706 PFSYNC_BUCKET_UNLOCK(b); 2707 CURVNET_RESTORE(); 2708 } 2709 2710 static void 2711 pfsync_push(struct pfsync_bucket *b) 2712 { 2713 2714 PFSYNC_BUCKET_LOCK_ASSERT(b); 2715 2716 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2717 swi_sched(V_pfsync_swi_cookie, 0); 2718 } 2719 2720 static void 2721 pfsync_push_all(struct pfsync_softc *sc) 2722 { 2723 int c; 2724 struct pfsync_bucket *b; 2725 2726 for (c = 0; c < pfsync_buckets; c++) { 2727 b = &sc->sc_buckets[c]; 2728 2729 PFSYNC_BUCKET_LOCK(b); 2730 pfsync_push(b); 2731 PFSYNC_BUCKET_UNLOCK(b); 2732 } 2733 } 2734 2735 static void 2736 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2737 { 2738 struct ip *ip; 2739 int af, error = 0; 2740 2741 ip = mtod(m, struct ip *); 2742 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2743 2744 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2745 2746 /* 2747 * We distinguish between a deferral packet and our 2748 * own pfsync packet based on M_SKIP_FIREWALL 2749 * flag. This is XXX. 2750 */ 2751 switch (af) { 2752 #ifdef INET 2753 case AF_INET: 2754 if (m->m_flags & M_SKIP_FIREWALL) { 2755 error = ip_output(m, NULL, NULL, 0, 2756 NULL, NULL); 2757 } else { 2758 error = ip_output(m, NULL, NULL, 2759 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2760 } 2761 break; 2762 #endif 2763 #ifdef INET6 2764 case AF_INET6: 2765 if (m->m_flags & M_SKIP_FIREWALL) { 2766 error = ip6_output(m, NULL, NULL, 0, 2767 NULL, NULL, NULL); 2768 } else { 2769 error = ip6_output(m, NULL, NULL, 0, 2770 &sc->sc_im6o, NULL, NULL); 2771 } 2772 break; 2773 #endif 2774 } 2775 2776 if (error == 0) 2777 V_pfsyncstats.pfsyncs_opackets++; 2778 else 2779 V_pfsyncstats.pfsyncs_oerrors++; 2780 2781 } 2782 2783 static void 2784 pfsyncintr(void *arg) 2785 { 2786 struct epoch_tracker et; 2787 struct pfsync_softc *sc = arg; 2788 struct pfsync_bucket *b; 2789 struct mbuf *m, *n; 2790 int c; 2791 2792 NET_EPOCH_ENTER(et); 2793 CURVNET_SET(sc->sc_ifp->if_vnet); 2794 2795 for (c = 0; c < pfsync_buckets; c++) { 2796 b = &sc->sc_buckets[c]; 2797 2798 PFSYNC_BUCKET_LOCK(b); 2799 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2800 pfsync_sendout(0, b->b_id); 2801 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2802 } 2803 _IF_DEQUEUE_ALL(&b->b_snd, m); 2804 PFSYNC_BUCKET_UNLOCK(b); 2805 2806 for (; m != NULL; m = n) { 2807 n = m->m_nextpkt; 2808 m->m_nextpkt = NULL; 2809 2810 pfsync_tx(sc, m); 2811 } 2812 } 2813 CURVNET_RESTORE(); 2814 NET_EPOCH_EXIT(et); 2815 } 2816 2817 static int 2818 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2819 struct in_mfilter* imf, struct in6_mfilter* im6f) 2820 { 2821 #ifdef INET 2822 struct ip_moptions *imo = &sc->sc_imo; 2823 #endif 2824 #ifdef INET6 2825 struct ip6_moptions *im6o = &sc->sc_im6o; 2826 struct sockaddr_in6 *syncpeer_sa6 = NULL; 2827 #endif 2828 2829 if (!(ifp->if_flags & IFF_MULTICAST)) 2830 return (EADDRNOTAVAIL); 2831 2832 switch (sc->sc_sync_peer.ss_family) { 2833 #ifdef INET 2834 case AF_INET: 2835 { 2836 int error; 2837 2838 ip_mfilter_init(&imo->imo_head); 2839 imo->imo_multicast_vif = -1; 2840 if ((error = in_joingroup(ifp, 2841 &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2842 &imf->imf_inm)) != 0) 2843 return (error); 2844 2845 ip_mfilter_insert(&imo->imo_head, imf); 2846 imo->imo_multicast_ifp = ifp; 2847 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2848 imo->imo_multicast_loop = 0; 2849 break; 2850 } 2851 #endif 2852 #ifdef INET6 2853 case AF_INET6: 2854 { 2855 int error; 2856 2857 syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; 2858 if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) 2859 return (error); 2860 2861 ip6_mfilter_init(&im6o->im6o_head); 2862 if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, 2863 &(im6f->im6f_in6m), 0)) != 0) 2864 return (error); 2865 2866 ip6_mfilter_insert(&im6o->im6o_head, im6f); 2867 im6o->im6o_multicast_ifp = ifp; 2868 im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; 2869 im6o->im6o_multicast_loop = 0; 2870 break; 2871 } 2872 #endif 2873 } 2874 2875 return (0); 2876 } 2877 2878 static void 2879 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2880 { 2881 #ifdef INET 2882 struct ip_moptions *imo = &sc->sc_imo; 2883 struct in_mfilter *imf; 2884 2885 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2886 ip_mfilter_remove(&imo->imo_head, imf); 2887 in_leavegroup(imf->imf_inm, NULL); 2888 ip_mfilter_free(imf); 2889 } 2890 imo->imo_multicast_ifp = NULL; 2891 #endif 2892 2893 #ifdef INET6 2894 struct ip6_moptions *im6o = &sc->sc_im6o; 2895 struct in6_mfilter *im6f; 2896 2897 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 2898 ip6_mfilter_remove(&im6o->im6o_head, im6f); 2899 in6_leavegroup(im6f->im6f_in6m, NULL); 2900 ip6_mfilter_free(im6f); 2901 } 2902 im6o->im6o_multicast_ifp = NULL; 2903 #endif 2904 } 2905 2906 void 2907 pfsync_detach_ifnet(struct ifnet *ifp) 2908 { 2909 struct pfsync_softc *sc = V_pfsyncif; 2910 2911 if (sc == NULL) 2912 return; 2913 2914 PFSYNC_LOCK(sc); 2915 2916 if (sc->sc_sync_if == ifp) { 2917 /* We don't need mutlicast cleanup here, because the interface 2918 * is going away. We do need to ensure we don't try to do 2919 * cleanup later. 2920 */ 2921 ip_mfilter_init(&sc->sc_imo.imo_head); 2922 sc->sc_imo.imo_multicast_ifp = NULL; 2923 sc->sc_im6o.im6o_multicast_ifp = NULL; 2924 sc->sc_sync_if = NULL; 2925 } 2926 2927 PFSYNC_UNLOCK(sc); 2928 } 2929 2930 static int 2931 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2932 { 2933 struct sockaddr_storage sa; 2934 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2935 status->flags = pfsyncr->pfsyncr_defer; 2936 2937 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2938 2939 memset(&sa, 0, sizeof(sa)); 2940 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2941 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2942 in->sin_family = AF_INET; 2943 in->sin_len = sizeof(*in); 2944 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2945 } 2946 status->syncpeer = sa; 2947 2948 return 0; 2949 } 2950 2951 static int 2952 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2953 { 2954 struct ifnet *sifp; 2955 struct in_mfilter *imf = NULL; 2956 struct in6_mfilter *im6f = NULL; 2957 int error; 2958 int c; 2959 2960 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2961 return (EINVAL); 2962 2963 if (status->syncdev[0] == '\0') 2964 sifp = NULL; 2965 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2966 return (EINVAL); 2967 2968 switch (status->syncpeer.ss_family) { 2969 #ifdef INET 2970 case AF_UNSPEC: 2971 case AF_INET: { 2972 struct sockaddr_in *status_sin; 2973 status_sin = (struct sockaddr_in *)&(status->syncpeer); 2974 if (sifp != NULL) { 2975 if (status_sin->sin_addr.s_addr == 0 || 2976 status_sin->sin_addr.s_addr == 2977 htonl(INADDR_PFSYNC_GROUP)) { 2978 status_sin->sin_family = AF_INET; 2979 status_sin->sin_len = sizeof(*status_sin); 2980 status_sin->sin_addr.s_addr = 2981 htonl(INADDR_PFSYNC_GROUP); 2982 } 2983 2984 if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { 2985 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2986 } 2987 } 2988 break; 2989 } 2990 #endif 2991 #ifdef INET6 2992 case AF_INET6: { 2993 struct sockaddr_in6 *status_sin6; 2994 status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); 2995 if (sifp != NULL) { 2996 if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || 2997 IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, 2998 &in6addr_linklocal_pfsync_group)) { 2999 status_sin6->sin6_family = AF_INET6; 3000 status_sin6->sin6_len = sizeof(*status_sin6); 3001 status_sin6->sin6_addr = 3002 in6addr_linklocal_pfsync_group; 3003 } 3004 3005 if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { 3006 im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); 3007 } 3008 } 3009 break; 3010 } 3011 #endif 3012 } 3013 3014 PFSYNC_LOCK(sc); 3015 3016 switch (status->version) { 3017 case PFSYNC_MSG_VERSION_UNSPECIFIED: 3018 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 3019 break; 3020 case PFSYNC_MSG_VERSION_1301: 3021 case PFSYNC_MSG_VERSION_1400: 3022 sc->sc_version = status->version; 3023 break; 3024 default: 3025 PFSYNC_UNLOCK(sc); 3026 return (EINVAL); 3027 } 3028 3029 switch (status->syncpeer.ss_family) { 3030 case AF_INET: { 3031 struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); 3032 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 3033 sc_sin->sin_family = AF_INET; 3034 sc_sin->sin_len = sizeof(*sc_sin); 3035 if (status_sin->sin_addr.s_addr == 0) { 3036 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 3037 } else { 3038 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 3039 } 3040 break; 3041 } 3042 case AF_INET6: { 3043 struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); 3044 struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; 3045 sc_sin->sin6_family = AF_INET6; 3046 sc_sin->sin6_len = sizeof(*sc_sin); 3047 if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { 3048 sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; 3049 } else { 3050 sc_sin->sin6_addr = status_sin->sin6_addr; 3051 } 3052 break; 3053 } 3054 } 3055 3056 sc->sc_maxupdates = status->maxupdates; 3057 if (status->flags & PFSYNCF_DEFER) { 3058 sc->sc_flags |= PFSYNCF_DEFER; 3059 V_pfsync_defer_ptr = pfsync_defer; 3060 } else { 3061 sc->sc_flags &= ~PFSYNCF_DEFER; 3062 V_pfsync_defer_ptr = NULL; 3063 } 3064 3065 if (sifp == NULL) { 3066 if (sc->sc_sync_if) 3067 if_rele(sc->sc_sync_if); 3068 sc->sc_sync_if = NULL; 3069 pfsync_multicast_cleanup(sc); 3070 PFSYNC_UNLOCK(sc); 3071 return (0); 3072 } 3073 3074 for (c = 0; c < pfsync_buckets; c++) { 3075 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 3076 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 3077 (sifp->if_mtu < sc->sc_ifp->if_mtu || 3078 (sc->sc_sync_if != NULL && 3079 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 3080 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 3081 pfsync_sendout(1, c); 3082 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 3083 } 3084 3085 pfsync_multicast_cleanup(sc); 3086 3087 if (((sc->sc_sync_peer.ss_family == AF_INET) && 3088 IN_MULTICAST(ntohl(((struct sockaddr_in *) 3089 &sc->sc_sync_peer)->sin_addr.s_addr))) || 3090 ((sc->sc_sync_peer.ss_family == AF_INET6) && 3091 IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) 3092 &sc->sc_sync_peer)->sin6_addr))) { 3093 error = pfsync_multicast_setup(sc, sifp, imf, im6f); 3094 if (error) { 3095 if_rele(sifp); 3096 PFSYNC_UNLOCK(sc); 3097 #ifdef INET 3098 if (imf != NULL) 3099 ip_mfilter_free(imf); 3100 #endif 3101 #ifdef INET6 3102 if (im6f != NULL) 3103 ip6_mfilter_free(im6f); 3104 #endif 3105 return (error); 3106 } 3107 } 3108 if (sc->sc_sync_if) 3109 if_rele(sc->sc_sync_if); 3110 sc->sc_sync_if = sifp; 3111 3112 switch (sc->sc_sync_peer.ss_family) { 3113 #ifdef INET 3114 case AF_INET: { 3115 struct ip *ip; 3116 ip = &sc->sc_template.ipv4; 3117 bzero(ip, sizeof(*ip)); 3118 ip->ip_v = IPVERSION; 3119 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 3120 ip->ip_tos = IPTOS_LOWDELAY; 3121 /* len and id are set later. */ 3122 ip->ip_off = htons(IP_DF); 3123 ip->ip_ttl = PFSYNC_DFLTTL; 3124 ip->ip_p = IPPROTO_PFSYNC; 3125 ip->ip_src.s_addr = INADDR_ANY; 3126 ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 3127 break; 3128 } 3129 #endif 3130 #ifdef INET6 3131 case AF_INET6: { 3132 struct ip6_hdr *ip6; 3133 ip6 = &sc->sc_template.ipv6; 3134 bzero(ip6, sizeof(*ip6)); 3135 ip6->ip6_vfc = IPV6_VERSION; 3136 ip6->ip6_hlim = PFSYNC_DFLTTL; 3137 ip6->ip6_nxt = IPPROTO_PFSYNC; 3138 ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; 3139 3140 struct epoch_tracker et; 3141 NET_EPOCH_ENTER(et); 3142 in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, 3143 sc->sc_sync_if, &ip6->ip6_src, NULL); 3144 NET_EPOCH_EXIT(et); 3145 break; 3146 } 3147 #endif 3148 } 3149 3150 /* Request a full state table update. */ 3151 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 3152 (*carp_demote_adj_p)(V_pfsync_carp_adj, 3153 "pfsync bulk start"); 3154 sc->sc_flags &= ~PFSYNCF_OK; 3155 if (V_pf_status.debug >= PF_DEBUG_MISC) 3156 printf("pfsync: requesting bulk update\n"); 3157 PFSYNC_UNLOCK(sc); 3158 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 3159 pfsync_request_update(0, 0); 3160 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 3161 PFSYNC_BLOCK(sc); 3162 sc->sc_ureq_sent = time_uptime; 3163 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 3164 PFSYNC_BUNLOCK(sc); 3165 return (0); 3166 } 3167 3168 static void 3169 pfsync_pointers_init(void) 3170 { 3171 3172 PF_RULES_WLOCK(); 3173 V_pfsync_state_import_ptr = pfsync_state_import; 3174 V_pfsync_insert_state_ptr = pfsync_insert_state; 3175 V_pfsync_update_state_ptr = pfsync_update_state; 3176 V_pfsync_delete_state_ptr = pfsync_delete_state; 3177 V_pfsync_clear_states_ptr = pfsync_clear_states; 3178 V_pfsync_defer_ptr = pfsync_defer; 3179 PF_RULES_WUNLOCK(); 3180 } 3181 3182 static void 3183 pfsync_pointers_uninit(void) 3184 { 3185 3186 PF_RULES_WLOCK(); 3187 V_pfsync_state_import_ptr = NULL; 3188 V_pfsync_insert_state_ptr = NULL; 3189 V_pfsync_update_state_ptr = NULL; 3190 V_pfsync_delete_state_ptr = NULL; 3191 V_pfsync_clear_states_ptr = NULL; 3192 V_pfsync_defer_ptr = NULL; 3193 PF_RULES_WUNLOCK(); 3194 } 3195 3196 static void 3197 vnet_pfsync_init(const void *unused __unused) 3198 { 3199 int error; 3200 3201 V_pfsync_cloner = if_clone_simple(pfsyncname, 3202 pfsync_clone_create, pfsync_clone_destroy, 1); 3203 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 3204 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 3205 if (error) { 3206 if_clone_detach(V_pfsync_cloner); 3207 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 3208 } 3209 3210 pfsync_pointers_init(); 3211 } 3212 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 3213 vnet_pfsync_init, NULL); 3214 3215 static void 3216 vnet_pfsync_uninit(const void *unused __unused) 3217 { 3218 int ret __diagused; 3219 3220 pfsync_pointers_uninit(); 3221 3222 if_clone_detach(V_pfsync_cloner); 3223 ret = swi_remove(V_pfsync_swi_cookie); 3224 MPASS(ret == 0); 3225 ret = intr_event_destroy(V_pfsync_swi_ie); 3226 MPASS(ret == 0); 3227 } 3228 3229 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 3230 vnet_pfsync_uninit, NULL); 3231 3232 static int 3233 pfsync_init(void) 3234 { 3235 int error; 3236 3237 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 3238 3239 #ifdef INET 3240 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 3241 if (error) 3242 return (error); 3243 #endif 3244 #ifdef INET6 3245 error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); 3246 if (error) { 3247 ipproto_unregister(IPPROTO_PFSYNC); 3248 return (error); 3249 } 3250 #endif 3251 3252 return (0); 3253 } 3254 3255 static void 3256 pfsync_uninit(void) 3257 { 3258 pfsync_detach_ifnet_ptr = NULL; 3259 3260 #ifdef INET 3261 ipproto_unregister(IPPROTO_PFSYNC); 3262 #endif 3263 #ifdef INET6 3264 ip6proto_unregister(IPPROTO_PFSYNC); 3265 #endif 3266 } 3267 3268 static int 3269 pfsync_modevent(module_t mod, int type, void *data) 3270 { 3271 int error = 0; 3272 3273 switch (type) { 3274 case MOD_LOAD: 3275 error = pfsync_init(); 3276 break; 3277 case MOD_UNLOAD: 3278 pfsync_uninit(); 3279 break; 3280 default: 3281 error = EINVAL; 3282 break; 3283 } 3284 3285 return (error); 3286 } 3287 3288 static moduledata_t pfsync_mod = { 3289 pfsyncname, 3290 pfsync_modevent, 3291 0 3292 }; 3293 3294 #define PFSYNC_MODVER 1 3295 3296 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 3297 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 3298 MODULE_VERSION(pfsync, PFSYNC_MODVER); 3299 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 3300