1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 #include "opt_pf.h" 66 67 #include <sys/param.h> 68 #include <sys/bus.h> 69 #include <sys/endian.h> 70 #include <sys/interrupt.h> 71 #include <sys/kernel.h> 72 #include <sys/lock.h> 73 #include <sys/mbuf.h> 74 #include <sys/module.h> 75 #include <sys/mutex.h> 76 #include <sys/nv.h> 77 #include <sys/priv.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sysctl.h> 82 #include <sys/syslog.h> 83 84 #include <net/bpf.h> 85 #include <net/if.h> 86 #include <net/if_var.h> 87 #include <net/if_clone.h> 88 #include <net/if_private.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/route.h> 93 #include <net/if_pfsync.h> 94 95 #include <netinet/if_ether.h> 96 #include <netinet/in.h> 97 #include <netinet/in_var.h> 98 #include <netinet6/in6_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip6.h> 101 #include <netinet/ip_carp.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_fsm.h> 105 #include <netinet/tcp_seq.h> 106 107 #include <netinet/ip6.h> 108 #include <netinet6/ip6_var.h> 109 #include <netinet6/scope6_var.h> 110 111 #include <netpfil/pf/pfsync_nv.h> 112 113 struct pfsync_bucket; 114 struct pfsync_softc; 115 116 union inet_template { 117 struct ip ipv4; 118 struct ip6_hdr ipv6; 119 }; 120 121 #define PFSYNC_MINPKT ( \ 122 sizeof(union inet_template) + \ 123 sizeof(struct pfsync_header) + \ 124 sizeof(struct pfsync_subheader) ) 125 126 static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, 127 struct pf_state_peer_export *); 128 static int pfsync_in_clr(struct mbuf *, int, int, int, int); 129 static int pfsync_in_ins(struct mbuf *, int, int, int, int); 130 static int pfsync_in_iack(struct mbuf *, int, int, int, int); 131 static int pfsync_in_upd(struct mbuf *, int, int, int, int); 132 static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); 133 static int pfsync_in_ureq(struct mbuf *, int, int, int, int); 134 static int pfsync_in_del_c(struct mbuf *, int, int, int, int); 135 static int pfsync_in_bus(struct mbuf *, int, int, int, int); 136 static int pfsync_in_tdb(struct mbuf *, int, int, int, int); 137 static int pfsync_in_eof(struct mbuf *, int, int, int, int); 138 static int pfsync_in_error(struct mbuf *, int, int, int, int); 139 140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { 141 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 142 pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ 143 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 144 pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ 145 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 146 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL */ 148 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 149 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 150 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 151 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 152 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 153 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 154 pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ 155 pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ 156 pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */ 157 pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */ 158 }; 159 160 struct pfsync_q { 161 void (*write)(struct pf_kstate *, void *); 162 size_t len; 163 u_int8_t action; 164 }; 165 166 /* We have the following sync queues */ 167 enum pfsync_q_id { 168 PFSYNC_Q_INS_1301, 169 PFSYNC_Q_INS_1400, 170 PFSYNC_Q_INS_1500, 171 PFSYNC_Q_IACK, 172 PFSYNC_Q_UPD_1301, 173 PFSYNC_Q_UPD_1400, 174 PFSYNC_Q_UPD_1500, 175 PFSYNC_Q_UPD_C, 176 PFSYNC_Q_DEL_C, 177 PFSYNC_Q_COUNT, 178 }; 179 180 /* Functions for building messages for given queue */ 181 static void pfsync_out_state_1301(struct pf_kstate *, void *); 182 static void pfsync_out_state_1400(struct pf_kstate *, void *); 183 static void pfsync_out_state_1500(struct pf_kstate *, void *); 184 static void pfsync_out_iack(struct pf_kstate *, void *); 185 static void pfsync_out_upd_c(struct pf_kstate *, void *); 186 static void pfsync_out_del_c(struct pf_kstate *, void *); 187 188 /* Attach those functions to queue */ 189 static struct pfsync_q pfsync_qs[] = { 190 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, 191 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, 192 { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 }, 193 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 194 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, 195 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, 196 { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 }, 197 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 198 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 199 }; 200 201 /* Map queue to pf_kstate->sync_state */ 202 static u_int8_t pfsync_qid_sstate[] = { 203 PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ 204 PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ 205 PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */ 206 PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ 207 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ 208 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ 209 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */ 210 PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ 211 PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ 212 }; 213 214 /* Map pf_kstate->sync_state to queue */ 215 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); 216 217 static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); 218 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 219 220 static void pfsync_update_state(struct pf_kstate *); 221 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 222 223 struct pfsync_upd_req_item { 224 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 225 struct pfsync_upd_req ur_msg; 226 }; 227 228 struct pfsync_deferral { 229 struct pfsync_softc *pd_sc; 230 TAILQ_ENTRY(pfsync_deferral) pd_entry; 231 struct callout pd_tmo; 232 233 struct pf_kstate *pd_st; 234 struct mbuf *pd_m; 235 }; 236 237 struct pfsync_bucket 238 { 239 int b_id; 240 struct pfsync_softc *b_sc; 241 struct mtx b_mtx; 242 struct callout b_tmo; 243 int b_flags; 244 #define PFSYNCF_BUCKET_PUSH 0x00000001 245 246 size_t b_len; 247 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; 248 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 249 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 250 u_int b_deferred; 251 uint8_t *b_plus; 252 size_t b_pluslen; 253 254 struct ifaltq b_snd; 255 }; 256 257 struct pfsync_softc { 258 /* Configuration */ 259 struct ifnet *sc_ifp; 260 struct ifnet *sc_sync_if; 261 struct ip_moptions sc_imo; 262 struct ip6_moptions sc_im6o; 263 struct sockaddr_storage sc_sync_peer; 264 uint32_t sc_flags; 265 uint8_t sc_maxupdates; 266 union inet_template sc_template; 267 struct mtx sc_mtx; 268 uint32_t sc_version; 269 270 /* Queued data */ 271 struct pfsync_bucket *sc_buckets; 272 273 /* Bulk update info */ 274 struct mtx sc_bulk_mtx; 275 uint32_t sc_ureq_sent; 276 int sc_bulk_tries; 277 uint32_t sc_ureq_received; 278 int sc_bulk_hashid; 279 uint64_t sc_bulk_stateid; 280 uint32_t sc_bulk_creatorid; 281 struct callout sc_bulk_tmo; 282 struct callout sc_bulkfail_tmo; 283 }; 284 285 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 286 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 287 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 288 289 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 290 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 291 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 292 293 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 294 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 295 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 296 297 #define PFSYNC_DEFER_TIMEOUT 20 298 299 static const char pfsyncname[] = "pfsync"; 300 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 301 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 302 #define V_pfsyncif VNET(pfsyncif) 303 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 304 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 305 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 306 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 307 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 308 #define V_pfsyncstats VNET(pfsyncstats) 309 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 310 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 311 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; 312 #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) 313 314 static void pfsync_timeout(void *); 315 static void pfsync_push(struct pfsync_bucket *); 316 static void pfsync_push_all(struct pfsync_softc *); 317 static void pfsyncintr(void *); 318 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 319 struct in_mfilter *, struct in6_mfilter *); 320 static void pfsync_multicast_cleanup(struct pfsync_softc *); 321 static void pfsync_pointers_init(void); 322 static void pfsync_pointers_uninit(void); 323 static int pfsync_init(void); 324 static void pfsync_uninit(void); 325 326 static unsigned long pfsync_buckets; 327 328 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 329 "PFSYNC"); 330 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 331 &VNET_NAME(pfsyncstats), pfsyncstats, 332 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 333 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 334 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 335 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 336 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 337 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, 338 &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); 339 340 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 341 static void pfsync_clone_destroy(struct ifnet *); 342 static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, 343 struct pf_state_peer *); 344 static int pfsyncoutput(struct ifnet *, struct mbuf *, 345 const struct sockaddr *, struct route *); 346 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 347 348 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 349 static void pfsync_undefer(struct pfsync_deferral *, int); 350 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 351 static void pfsync_undefer_state(struct pf_kstate *, int); 352 static void pfsync_defer_tmo(void *); 353 354 static void pfsync_request_update(u_int32_t, u_int64_t); 355 static bool pfsync_update_state_req(struct pf_kstate *); 356 357 static void pfsync_drop_all(struct pfsync_softc *); 358 static void pfsync_drop(struct pfsync_softc *, int); 359 static void pfsync_sendout(int, int); 360 static void pfsync_send_plus(void *, size_t); 361 362 static void pfsync_bulk_start(void); 363 static void pfsync_bulk_status(u_int8_t); 364 static void pfsync_bulk_update(void *); 365 static void pfsync_bulk_fail(void *); 366 367 static void pfsync_detach_ifnet(struct ifnet *); 368 369 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 370 struct pfsync_kstatus *); 371 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 372 struct pfsync_softc *); 373 374 #ifdef IPSEC 375 static void pfsync_update_net_tdb(struct pfsync_tdb *); 376 #endif 377 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 378 struct pf_kstate *); 379 380 #define PFSYNC_MAX_BULKTRIES 12 381 382 VNET_DEFINE(struct if_clone *, pfsync_cloner); 383 #define V_pfsync_cloner VNET(pfsync_cloner) 384 385 const struct in6_addr in6addr_linklocal_pfsync_group = 386 {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; 388 static int 389 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 390 { 391 struct pfsync_softc *sc; 392 struct ifnet *ifp; 393 struct pfsync_bucket *b; 394 int c; 395 enum pfsync_q_id q; 396 397 if (unit != 0) 398 return (EINVAL); 399 400 if (! pfsync_buckets) 401 pfsync_buckets = mp_ncpus * 2; 402 403 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 404 sc->sc_flags |= PFSYNCF_OK; 405 sc->sc_maxupdates = 128; 406 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 407 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 408 M_PFSYNC, M_ZERO | M_WAITOK); 409 for (c = 0; c < pfsync_buckets; c++) { 410 b = &sc->sc_buckets[c]; 411 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 412 413 b->b_id = c; 414 b->b_sc = sc; 415 b->b_len = PFSYNC_MINPKT; 416 417 for (q = 0; q < PFSYNC_Q_COUNT; q++) 418 TAILQ_INIT(&b->b_qs[q]); 419 420 TAILQ_INIT(&b->b_upd_req_list); 421 TAILQ_INIT(&b->b_deferrals); 422 423 callout_init(&b->b_tmo, 1); 424 425 b->b_snd.ifq_maxlen = ifqmaxlen; 426 } 427 428 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 429 if_initname(ifp, pfsyncname, unit); 430 ifp->if_softc = sc; 431 ifp->if_ioctl = pfsyncioctl; 432 ifp->if_output = pfsyncoutput; 433 ifp->if_hdrlen = sizeof(struct pfsync_header); 434 ifp->if_mtu = ETHERMTU; 435 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 436 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 437 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 438 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 439 440 if_attach(ifp); 441 442 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 443 444 V_pfsyncif = sc; 445 446 return (0); 447 } 448 449 static void 450 pfsync_clone_destroy(struct ifnet *ifp) 451 { 452 struct pfsync_softc *sc = ifp->if_softc; 453 struct pfsync_bucket *b; 454 int c, ret; 455 456 for (c = 0; c < pfsync_buckets; c++) { 457 b = &sc->sc_buckets[c]; 458 /* 459 * At this stage, everything should have already been 460 * cleared by pfsync_uninit(), and we have only to 461 * drain callouts. 462 */ 463 PFSYNC_BUCKET_LOCK(b); 464 while (b->b_deferred > 0) { 465 struct pfsync_deferral *pd = 466 TAILQ_FIRST(&b->b_deferrals); 467 468 ret = callout_stop(&pd->pd_tmo); 469 PFSYNC_BUCKET_UNLOCK(b); 470 if (ret > 0) { 471 pfsync_undefer(pd, 1); 472 } else { 473 callout_drain(&pd->pd_tmo); 474 } 475 PFSYNC_BUCKET_LOCK(b); 476 } 477 MPASS(b->b_deferred == 0); 478 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 479 PFSYNC_BUCKET_UNLOCK(b); 480 481 free(b->b_plus, M_PFSYNC); 482 b->b_plus = NULL; 483 b->b_pluslen = 0; 484 485 callout_drain(&b->b_tmo); 486 } 487 488 callout_drain(&sc->sc_bulkfail_tmo); 489 callout_drain(&sc->sc_bulk_tmo); 490 491 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 492 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 493 bpfdetach(ifp); 494 if_detach(ifp); 495 496 pfsync_drop_all(sc); 497 498 if_free(ifp); 499 pfsync_multicast_cleanup(sc); 500 mtx_destroy(&sc->sc_mtx); 501 mtx_destroy(&sc->sc_bulk_mtx); 502 503 for (c = 0; c < pfsync_buckets; c++) { 504 b = &sc->sc_buckets[c]; 505 mtx_destroy(&b->b_mtx); 506 } 507 free(sc->sc_buckets, M_PFSYNC); 508 free(sc, M_PFSYNC); 509 510 V_pfsyncif = NULL; 511 } 512 513 static int 514 pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, 515 struct pf_state_peer *d) 516 { 517 if (s->scrub.scrub_flag && d->scrub == NULL) { 518 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 519 if (d->scrub == NULL) 520 return (ENOMEM); 521 } 522 523 return (0); 524 } 525 526 static int 527 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) 528 { 529 struct pfsync_softc *sc = V_pfsyncif; 530 #ifndef __NO_STRICT_ALIGNMENT 531 struct pfsync_state_key key[2]; 532 #endif 533 struct pfsync_state_key *kw, *ks; 534 struct pf_kstate *st = NULL; 535 struct pf_state_key *skw = NULL, *sks = NULL; 536 struct pf_krule *r = NULL; 537 struct pfi_kkif *kif, *orig_kif; 538 struct pfi_kkif *rt_kif = NULL; 539 struct pf_kpooladdr *rpool_first; 540 int error; 541 int n = 0; 542 sa_family_t rt_af = 0; 543 uint8_t rt = 0; 544 sa_family_t wire_af, stack_af; 545 u_int8_t wire_proto, stack_proto; 546 547 PF_RULES_RASSERT(); 548 549 if (strnlen(sp->pfs_1301.ifname, IFNAMSIZ) == IFNAMSIZ) 550 return (EINVAL); 551 552 if (sp->pfs_1301.creatorid == 0) { 553 if (V_pf_status.debug >= PF_DEBUG_MISC) 554 printf("%s: invalid creator id: %08x\n", __func__, 555 ntohl(sp->pfs_1301.creatorid)); 556 return (EINVAL); 557 } 558 559 /* 560 * Check interfaces early on. Do it before allocating memory etc. 561 * Because there is a high chance there will be a lot more such states. 562 */ 563 if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { 564 if (V_pf_status.debug >= PF_DEBUG_MISC) 565 printf("%s: unknown interface: %s\n", __func__, 566 sp->pfs_1301.ifname); 567 if (flags & PFSYNC_SI_IOCTL) 568 return (EINVAL); 569 return (0); /* skip this state */ 570 } 571 572 /* 573 * States created with floating interface policy can be synchronized to 574 * hosts with different interfaces, because they are bound to V_pfi_all. 575 * But s->orig_kif still points to a real interface. Don't abort 576 * importing the state if orig_kif does not exists on the importing host 577 * but the state is not interface-bound. 578 */ 579 if (msg_version == PFSYNC_MSG_VERSION_1500) { 580 orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname); 581 if (orig_kif == NULL) { 582 if (kif == V_pfi_all) { 583 orig_kif = kif; 584 } else { 585 if (V_pf_status.debug >= PF_DEBUG_MISC) 586 printf("%s: unknown original interface:" 587 " %s\n", __func__, 588 sp->pfs_1500.orig_ifname); 589 if (flags & PFSYNC_SI_IOCTL) 590 return (EINVAL); 591 return (0); /* skip this state */ 592 } 593 } 594 } 595 596 /* 597 * If the ruleset checksums match or the state is coming from the ioctl, 598 * it's safe to associate the state with the rule of that number. 599 */ 600 if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && 601 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < 602 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { 603 TAILQ_FOREACH(r, pf_main_ruleset.rules[ 604 PF_RULESET_FILTER].active.ptr, entries) 605 if (ntohl(sp->pfs_1301.rule) == n++) 606 break; 607 } else 608 r = &V_pf_default_rule; 609 610 switch (msg_version) { 611 case PFSYNC_MSG_VERSION_1301: 612 /* 613 * On FreeBSD <= 13 the routing interface and routing operation 614 * are not sent over pfsync. If the ruleset is identical, 615 * though, we might be able to recover the routing information 616 * from the local ruleset. 617 */ 618 if (r != &V_pf_default_rule) { 619 struct pf_kpool *pool = &r->route; 620 621 /* Backwards compatibility. */ 622 if (TAILQ_EMPTY(&pool->list)) 623 pool = &r->rdr; 624 625 /* 626 * The ruleset is identical, try to recover. If the rule 627 * has a redirection pool with a single interface, there 628 * is a chance that this interface is identical as on 629 * the pfsync peer. If there's more than one interface, 630 * give up, as we can't be sure that we will pick the 631 * same one as the pfsync peer did. 632 */ 633 rpool_first = TAILQ_FIRST(&(pool->list)); 634 if ((rpool_first == NULL) || 635 (TAILQ_NEXT(rpool_first, entries) != NULL)) { 636 DPFPRINTF(PF_DEBUG_MISC, 637 "%s: can't recover routing information " 638 "because of empty or bad redirection pool", 639 __func__); 640 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 641 } 642 rt = r->rt; 643 rt_kif = rpool_first->kif; 644 /* 645 * Guess the AF of the route address, FreeBSD 13 does 646 * not support af-to nor prefer-ipv6-nexthop 647 * so it should be safe. 648 */ 649 rt_af = r->af; 650 } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { 651 /* 652 * Ruleset different, routing *supposedly* requested, 653 * give up on recovering. 654 */ 655 DPFPRINTF(PF_DEBUG_MISC, 656 "%s: can't recover routing information " 657 "because of different ruleset", __func__); 658 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 659 } 660 wire_af = stack_af = sp->pfs_1301.af; 661 wire_proto = stack_proto = sp->pfs_1301.proto; 662 break; 663 case PFSYNC_MSG_VERSION_1400: 664 /* 665 * On FreeBSD 14 we're not taking any chances. 666 * We use the information synced to us. 667 */ 668 if (sp->pfs_1400.rt) { 669 rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); 670 if (rt_kif == NULL) { 671 DPFPRINTF(PF_DEBUG_MISC, 672 "%s: unknown route interface: %s", 673 __func__, sp->pfs_1400.rt_ifname); 674 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 675 } 676 rt = sp->pfs_1400.rt; 677 /* 678 * Guess the AF of the route address, FreeBSD 14 does 679 * not support af-to nor prefer-ipv6-nexthop 680 * so it should be safe. 681 */ 682 rt_af = sp->pfs_1400.af; 683 } 684 wire_af = stack_af = sp->pfs_1400.af; 685 wire_proto = stack_proto = sp->pfs_1400.proto; 686 break; 687 case PFSYNC_MSG_VERSION_1500: 688 /* 689 * On FreeBSD 15 and above we're not taking any chances. 690 * We use the information synced to us. 691 */ 692 if (sp->pfs_1500.rt) { 693 rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname); 694 if (rt_kif == NULL) { 695 DPFPRINTF(PF_DEBUG_MISC, 696 "%s: unknown route interface: %s", 697 __func__, sp->pfs_1500.rt_ifname); 698 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 699 } 700 rt = sp->pfs_1500.rt; 701 rt_af = sp->pfs_1500.rt_af; 702 } 703 wire_af = sp->pfs_1500.wire_af; 704 stack_af = sp->pfs_1500.stack_af; 705 wire_proto = sp->pfs_1500.wire_proto; 706 stack_proto = sp->pfs_1500.stack_proto; 707 break; 708 } 709 710 if ((r->max_states && 711 counter_u64_fetch(r->states_cur) >= r->max_states)) 712 goto cleanup; 713 714 /* 715 * XXXGL: consider M_WAITOK in ioctl path after. 716 */ 717 st = pf_alloc_state(M_NOWAIT); 718 if (__predict_false(st == NULL)) 719 goto cleanup; 720 721 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 722 goto cleanup; 723 724 #ifndef __NO_STRICT_ALIGNMENT 725 bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); 726 kw = &key[PF_SK_WIRE]; 727 ks = &key[PF_SK_STACK]; 728 #else 729 kw = &sp->pfs_1301.key[PF_SK_WIRE]; 730 ks = &sp->pfs_1301.key[PF_SK_STACK]; 731 #endif 732 733 if (wire_af != stack_af || 734 PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) || 735 PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) || 736 kw->port[0] != ks->port[0] || 737 kw->port[1] != ks->port[1]) { 738 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 739 if (sks == NULL) 740 goto cleanup; 741 } else 742 sks = skw; 743 744 /* allocate memory for scrub info */ 745 if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || 746 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) 747 goto cleanup; 748 749 /* Copy to state key(s). */ 750 skw->addr[0] = kw->addr[0]; 751 skw->addr[1] = kw->addr[1]; 752 skw->port[0] = kw->port[0]; 753 skw->port[1] = kw->port[1]; 754 skw->proto = wire_proto; 755 skw->af = wire_af; 756 if (sks != skw) { 757 sks->addr[0] = ks->addr[0]; 758 sks->addr[1] = ks->addr[1]; 759 sks->port[0] = ks->port[0]; 760 sks->port[1] = ks->port[1]; 761 sks->proto = stack_proto; 762 sks->af = stack_af; 763 } 764 765 /* copy to state */ 766 st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; 767 st->act.rt = rt; 768 st->act.rt_kif = rt_kif; 769 st->act.rt_af = rt_af; 770 771 switch (msg_version) { 772 case PFSYNC_MSG_VERSION_1301: 773 st->state_flags = sp->pfs_1301.state_flags; 774 st->direction = sp->pfs_1301.direction; 775 st->act.log = sp->pfs_1301.log; 776 st->timeout = sp->pfs_1301.timeout; 777 if (rt) 778 bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, 779 sizeof(st->act.rt_addr)); 780 /* 781 * In FreeBSD 13 pfsync lacks many attributes. Copy them 782 * from the rule if possible. If rule can't be matched 783 * clear any set options as we can't recover their 784 * parameters. 785 */ 786 if (r == &V_pf_default_rule) { 787 st->state_flags &= ~PFSTATE_SETMASK; 788 } else { 789 /* 790 * Similar to pf_rule_to_actions(). This code 791 * won't set the actions properly if they come 792 * from multiple "match" rules as only rule 793 * creating the state is send over pfsync. 794 */ 795 st->act.qid = r->qid; 796 st->act.pqid = r->pqid; 797 st->act.rtableid = r->rtableid; 798 if (r->scrub_flags & PFSTATE_SETTOS) 799 st->act.set_tos = r->set_tos; 800 st->act.min_ttl = r->min_ttl; 801 st->act.max_mss = r->max_mss; 802 st->state_flags |= (r->scrub_flags & 803 (PFSTATE_NODF|PFSTATE_RANDOMID| 804 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| 805 PFSTATE_SETPRIO)); 806 if (r->dnpipe || r->dnrpipe) { 807 if (r->free_flags & PFRULE_DN_IS_PIPE) 808 st->state_flags |= PFSTATE_DN_IS_PIPE; 809 else 810 st->state_flags &= ~PFSTATE_DN_IS_PIPE; 811 } 812 st->act.dnpipe = r->dnpipe; 813 st->act.dnrpipe = r->dnrpipe; 814 } 815 break; 816 case PFSYNC_MSG_VERSION_1400: 817 st->state_flags = ntohs(sp->pfs_1400.state_flags); 818 st->direction = sp->pfs_1400.direction; 819 st->act.log = sp->pfs_1400.log; 820 st->timeout = sp->pfs_1400.timeout; 821 st->act.qid = ntohs(sp->pfs_1400.qid); 822 st->act.pqid = ntohs(sp->pfs_1400.pqid); 823 st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); 824 st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); 825 st->act.rtableid = ntohl(sp->pfs_1400.rtableid); 826 st->act.min_ttl = sp->pfs_1400.min_ttl; 827 st->act.set_tos = sp->pfs_1400.set_tos; 828 st->act.max_mss = ntohs(sp->pfs_1400.max_mss); 829 st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; 830 st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; 831 if (rt) 832 bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr, 833 sizeof(st->act.rt_addr)); 834 break; 835 case PFSYNC_MSG_VERSION_1500: 836 st->state_flags = ntohs(sp->pfs_1500.state_flags); 837 st->direction = sp->pfs_1500.direction; 838 st->act.log = sp->pfs_1500.log; 839 st->timeout = sp->pfs_1500.timeout; 840 st->act.qid = ntohs(sp->pfs_1500.qid); 841 st->act.pqid = ntohs(sp->pfs_1500.pqid); 842 st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe); 843 st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe); 844 st->act.rtableid = ntohl(sp->pfs_1500.rtableid); 845 st->act.min_ttl = sp->pfs_1500.min_ttl; 846 st->act.set_tos = sp->pfs_1500.set_tos; 847 st->act.max_mss = ntohs(sp->pfs_1500.max_mss); 848 st->act.set_prio[0] = sp->pfs_1500.set_prio[0]; 849 st->act.set_prio[1] = sp->pfs_1500.set_prio[1]; 850 if (rt) 851 bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr, 852 sizeof(st->act.rt_addr)); 853 if (sp->pfs_1500.tagname[0] != 0) 854 st->tag = pf_tagname2tag(sp->pfs_1500.tagname); 855 break; 856 default: 857 panic("%s: Unsupported pfsync_msg_version %d", 858 __func__, msg_version); 859 } 860 861 st->expire = pf_get_uptime(); 862 if (sp->pfs_1301.expire) { 863 uint32_t timeout; 864 timeout = r->timeout[st->timeout]; 865 if (!timeout) 866 timeout = V_pf_default_rule.timeout[st->timeout]; 867 868 /* sp->expire may have been adaptively scaled by export. */ 869 st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; 870 } 871 872 if (! (st->act.rtableid == -1 || 873 (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) 874 goto cleanup; 875 876 st->id = sp->pfs_1301.id; 877 st->creatorid = sp->pfs_1301.creatorid; 878 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 879 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 880 881 st->rule = r; 882 st->nat_rule = NULL; 883 st->anchor = NULL; 884 885 st->pfsync_time = time_uptime; 886 st->sync_state = PFSYNC_S_NONE; 887 888 if (!(flags & PFSYNC_SI_IOCTL)) 889 st->state_flags |= PFSTATE_NOSYNC; 890 891 if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0) 892 goto cleanup_state; 893 894 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 895 counter_u64_add(r->states_cur, 1); 896 counter_u64_add(r->states_tot, 1); 897 898 if (!(flags & PFSYNC_SI_IOCTL)) { 899 st->state_flags &= ~PFSTATE_NOSYNC; 900 if (st->state_flags & PFSTATE_ACK) { 901 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 902 PFSYNC_BUCKET_LOCK(b); 903 pfsync_q_ins(st, PFSYNC_S_IACK, true); 904 PFSYNC_BUCKET_UNLOCK(b); 905 906 pfsync_push_all(sc); 907 } 908 } 909 st->state_flags &= ~PFSTATE_ACK; 910 PF_STATE_UNLOCK(st); 911 912 return (0); 913 914 cleanup: 915 error = ENOMEM; 916 917 if (skw == sks) 918 sks = NULL; 919 uma_zfree(V_pf_state_key_z, skw); 920 uma_zfree(V_pf_state_key_z, sks); 921 922 cleanup_state: /* pf_state_insert() frees the state keys. */ 923 if (st) { 924 st->timeout = PFTM_UNLINKED; /* appease an assert */ 925 pf_free_state(st); 926 } 927 return (error); 928 } 929 930 #ifdef INET 931 static int 932 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 933 { 934 struct pfsync_softc *sc = V_pfsyncif; 935 struct mbuf *m = *mp; 936 struct ip *ip = mtod(m, struct ip *); 937 struct pfsync_header *ph; 938 struct pfsync_subheader subh; 939 940 int offset, len, flags = 0; 941 int rv; 942 uint16_t count; 943 944 PF_RULES_RLOCK_TRACKER; 945 946 *mp = NULL; 947 V_pfsyncstats.pfsyncs_ipackets++; 948 949 /* Verify that we have a sync interface configured. */ 950 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 951 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 952 goto done; 953 954 /* verify that the packet came in on the right interface */ 955 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 956 V_pfsyncstats.pfsyncs_badif++; 957 goto done; 958 } 959 960 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 961 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 962 /* verify that the IP TTL is 255. */ 963 if (ip->ip_ttl != PFSYNC_DFLTTL) { 964 V_pfsyncstats.pfsyncs_badttl++; 965 goto done; 966 } 967 968 offset = ip->ip_hl << 2; 969 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 970 V_pfsyncstats.pfsyncs_hdrops++; 971 goto done; 972 } 973 974 if (offset + sizeof(*ph) > m->m_len) { 975 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 976 V_pfsyncstats.pfsyncs_hdrops++; 977 return (IPPROTO_DONE); 978 } 979 ip = mtod(m, struct ip *); 980 } 981 ph = (struct pfsync_header *)((char *)ip + offset); 982 983 /* verify the version */ 984 if (ph->version != PFSYNC_VERSION) { 985 V_pfsyncstats.pfsyncs_badver++; 986 goto done; 987 } 988 989 len = ntohs(ph->len) + offset; 990 if (m->m_pkthdr.len < len) { 991 V_pfsyncstats.pfsyncs_badlen++; 992 goto done; 993 } 994 995 /* 996 * Trusting pf_chksum during packet processing, as well as seeking 997 * in interface name tree, require holding PF_RULES_RLOCK(). 998 */ 999 PF_RULES_RLOCK(); 1000 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1001 flags = PFSYNC_SI_CKSUM; 1002 1003 offset += sizeof(*ph); 1004 while (offset <= len - sizeof(subh)) { 1005 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1006 offset += sizeof(subh); 1007 1008 if (subh.action >= PFSYNC_ACT_MAX) { 1009 V_pfsyncstats.pfsyncs_badact++; 1010 PF_RULES_RUNLOCK(); 1011 goto done; 1012 } 1013 1014 count = ntohs(subh.count); 1015 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1016 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1017 if (rv == -1) { 1018 PF_RULES_RUNLOCK(); 1019 return (IPPROTO_DONE); 1020 } 1021 1022 offset += rv; 1023 } 1024 PF_RULES_RUNLOCK(); 1025 1026 done: 1027 m_freem(m); 1028 return (IPPROTO_DONE); 1029 } 1030 #endif 1031 1032 #ifdef INET6 1033 static int 1034 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) 1035 { 1036 struct pfsync_softc *sc = V_pfsyncif; 1037 struct mbuf *m = *mp; 1038 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1039 struct pfsync_header *ph; 1040 struct pfsync_subheader subh; 1041 1042 int offset, len, flags = 0; 1043 int rv; 1044 uint16_t count; 1045 1046 PF_RULES_RLOCK_TRACKER; 1047 1048 *mp = NULL; 1049 V_pfsyncstats.pfsyncs_ipackets++; 1050 1051 /* Verify that we have a sync interface configured. */ 1052 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 1053 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1054 goto done; 1055 1056 /* verify that the packet came in on the right interface */ 1057 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 1058 V_pfsyncstats.pfsyncs_badif++; 1059 goto done; 1060 } 1061 1062 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 1063 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1064 /* verify that the IP TTL is 255. */ 1065 if (ip6->ip6_hlim != PFSYNC_DFLTTL) { 1066 V_pfsyncstats.pfsyncs_badttl++; 1067 goto done; 1068 } 1069 1070 1071 offset = sizeof(*ip6); 1072 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 1073 V_pfsyncstats.pfsyncs_hdrops++; 1074 goto done; 1075 } 1076 1077 if (offset + sizeof(*ph) > m->m_len) { 1078 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 1079 V_pfsyncstats.pfsyncs_hdrops++; 1080 return (IPPROTO_DONE); 1081 } 1082 ip6 = mtod(m, struct ip6_hdr *); 1083 } 1084 ph = (struct pfsync_header *)((char *)ip6 + offset); 1085 1086 /* verify the version */ 1087 if (ph->version != PFSYNC_VERSION) { 1088 V_pfsyncstats.pfsyncs_badver++; 1089 goto done; 1090 } 1091 1092 len = ntohs(ph->len) + offset; 1093 if (m->m_pkthdr.len < len) { 1094 V_pfsyncstats.pfsyncs_badlen++; 1095 goto done; 1096 } 1097 1098 /* 1099 * Trusting pf_chksum during packet processing, as well as seeking 1100 * in interface name tree, require holding PF_RULES_RLOCK(). 1101 */ 1102 PF_RULES_RLOCK(); 1103 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1104 flags = PFSYNC_SI_CKSUM; 1105 1106 offset += sizeof(*ph); 1107 while (offset <= len - sizeof(subh)) { 1108 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1109 offset += sizeof(subh); 1110 1111 if (subh.action >= PFSYNC_ACT_MAX) { 1112 V_pfsyncstats.pfsyncs_badact++; 1113 PF_RULES_RUNLOCK(); 1114 goto done; 1115 } 1116 1117 count = ntohs(subh.count); 1118 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1119 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1120 if (rv == -1) { 1121 PF_RULES_RUNLOCK(); 1122 return (IPPROTO_DONE); 1123 } 1124 1125 offset += rv; 1126 } 1127 PF_RULES_RUNLOCK(); 1128 1129 done: 1130 m_freem(m); 1131 return (IPPROTO_DONE); 1132 } 1133 #endif 1134 1135 static int 1136 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) 1137 { 1138 struct pfsync_clr *clr; 1139 struct mbuf *mp; 1140 int len = sizeof(*clr) * count; 1141 int i, offp; 1142 u_int32_t creatorid; 1143 1144 mp = m_pulldown(m, offset, len, &offp); 1145 if (mp == NULL) { 1146 V_pfsyncstats.pfsyncs_badlen++; 1147 return (-1); 1148 } 1149 clr = (struct pfsync_clr *)(mp->m_data + offp); 1150 1151 for (i = 0; i < count; i++) { 1152 creatorid = clr[i].creatorid; 1153 1154 if (clr[i].ifname[0] != '\0' && 1155 pfi_kkif_find(clr[i].ifname) == NULL) 1156 continue; 1157 1158 for (int i = 0; i <= V_pf_hashmask; i++) { 1159 struct pf_idhash *ih = &V_pf_idhash[i]; 1160 struct pf_kstate *s; 1161 relock: 1162 PF_HASHROW_LOCK(ih); 1163 LIST_FOREACH(s, &ih->states, entry) { 1164 if (s->creatorid == creatorid) { 1165 s->state_flags |= PFSTATE_NOSYNC; 1166 pf_remove_state(s); 1167 goto relock; 1168 } 1169 } 1170 PF_HASHROW_UNLOCK(ih); 1171 } 1172 } 1173 1174 return (len); 1175 } 1176 1177 static int 1178 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) 1179 { 1180 struct mbuf *mp; 1181 union pfsync_state_union *sa, *sp; 1182 int i, offp, total_len, msg_version, msg_len; 1183 u_int8_t timeout, direction; 1184 sa_family_t af; 1185 1186 switch (action) { 1187 case PFSYNC_ACT_INS_1301: 1188 msg_len = sizeof(struct pfsync_state_1301); 1189 msg_version = PFSYNC_MSG_VERSION_1301; 1190 break; 1191 case PFSYNC_ACT_INS_1400: 1192 msg_len = sizeof(struct pfsync_state_1400); 1193 msg_version = PFSYNC_MSG_VERSION_1400; 1194 break; 1195 case PFSYNC_ACT_INS_1500: 1196 msg_len = sizeof(struct pfsync_state_1500); 1197 msg_version = PFSYNC_MSG_VERSION_1500; 1198 break; 1199 default: 1200 V_pfsyncstats.pfsyncs_badver++; 1201 return (-1); 1202 } 1203 1204 total_len = msg_len * count; 1205 1206 mp = m_pulldown(m, offset, total_len, &offp); 1207 if (mp == NULL) { 1208 V_pfsyncstats.pfsyncs_badlen++; 1209 return (-1); 1210 } 1211 sa = (union pfsync_state_union *)(mp->m_data + offp); 1212 1213 for (i = 0; i < count; i++) { 1214 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1215 1216 switch (msg_version) { 1217 case PFSYNC_MSG_VERSION_1301: 1218 case PFSYNC_MSG_VERSION_1400: 1219 af = sp->pfs_1301.af; 1220 timeout = sp->pfs_1301.timeout; 1221 direction = sp->pfs_1301.direction; 1222 break; 1223 case PFSYNC_MSG_VERSION_1500: 1224 af = sp->pfs_1500.wire_af; 1225 timeout = sp->pfs_1500.timeout; 1226 direction = sp->pfs_1500.direction; 1227 break; 1228 } 1229 1230 /* Check for invalid values. */ 1231 if (timeout >= PFTM_MAX || 1232 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1233 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || 1234 direction > PF_OUT || 1235 (af != AF_INET && af != AF_INET6)) { 1236 if (V_pf_status.debug >= PF_DEBUG_MISC) 1237 printf("%s: invalid value\n", __func__); 1238 V_pfsyncstats.pfsyncs_badval++; 1239 continue; 1240 } 1241 1242 if (pfsync_state_import(sp, flags, msg_version) != 0) 1243 V_pfsyncstats.pfsyncs_badact++; 1244 } 1245 1246 return (total_len); 1247 } 1248 1249 static int 1250 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) 1251 { 1252 struct pfsync_ins_ack *ia, *iaa; 1253 struct pf_kstate *st; 1254 1255 struct mbuf *mp; 1256 int len = count * sizeof(*ia); 1257 int offp, i; 1258 1259 mp = m_pulldown(m, offset, len, &offp); 1260 if (mp == NULL) { 1261 V_pfsyncstats.pfsyncs_badlen++; 1262 return (-1); 1263 } 1264 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 1265 1266 for (i = 0; i < count; i++) { 1267 ia = &iaa[i]; 1268 1269 st = pf_find_state_byid(ia->id, ia->creatorid); 1270 if (st == NULL) 1271 continue; 1272 1273 if (st->state_flags & PFSTATE_ACK) { 1274 pfsync_undefer_state(st, 0); 1275 } 1276 PF_STATE_UNLOCK(st); 1277 } 1278 /* 1279 * XXX this is not yet implemented, but we know the size of the 1280 * message so we can skip it. 1281 */ 1282 1283 return (count * sizeof(struct pfsync_ins_ack)); 1284 } 1285 1286 static int 1287 pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, 1288 struct pf_state_peer_export *dst) 1289 { 1290 int sync = 0; 1291 1292 PF_STATE_LOCK_ASSERT(st); 1293 1294 /* 1295 * The state should never go backwards except 1296 * for syn-proxy states. Neither should the 1297 * sequence window slide backwards. 1298 */ 1299 if ((st->src.state > src->state && 1300 (st->src.state < PF_TCPS_PROXY_SRC || 1301 src->state >= PF_TCPS_PROXY_SRC)) || 1302 1303 (st->src.state == src->state && 1304 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 1305 sync++; 1306 else 1307 pf_state_peer_ntoh(src, &st->src); 1308 1309 if ((st->dst.state > dst->state) || 1310 1311 (st->dst.state >= TCPS_SYN_SENT && 1312 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 1313 sync++; 1314 else 1315 pf_state_peer_ntoh(dst, &st->dst); 1316 1317 return (sync); 1318 } 1319 1320 static int 1321 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) 1322 { 1323 struct pfsync_softc *sc = V_pfsyncif; 1324 union pfsync_state_union *sa, *sp; 1325 struct pf_kstate *st; 1326 struct mbuf *mp; 1327 int sync, offp, i, total_len, msg_len, msg_version; 1328 u_int8_t timeout; 1329 1330 switch (action) { 1331 case PFSYNC_ACT_UPD_1301: 1332 msg_len = sizeof(struct pfsync_state_1301); 1333 msg_version = PFSYNC_MSG_VERSION_1301; 1334 break; 1335 case PFSYNC_ACT_UPD_1400: 1336 msg_len = sizeof(struct pfsync_state_1400); 1337 msg_version = PFSYNC_MSG_VERSION_1400; 1338 break; 1339 case PFSYNC_ACT_UPD_1500: 1340 msg_len = sizeof(struct pfsync_state_1500); 1341 msg_version = PFSYNC_MSG_VERSION_1500; 1342 break; 1343 default: 1344 V_pfsyncstats.pfsyncs_badact++; 1345 return (-1); 1346 } 1347 1348 total_len = msg_len * count; 1349 1350 mp = m_pulldown(m, offset, total_len, &offp); 1351 if (mp == NULL) { 1352 V_pfsyncstats.pfsyncs_badlen++; 1353 return (-1); 1354 } 1355 sa = (union pfsync_state_union *)(mp->m_data + offp); 1356 1357 for (i = 0; i < count; i++) { 1358 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1359 1360 switch (msg_version) { 1361 case PFSYNC_MSG_VERSION_1301: 1362 case PFSYNC_MSG_VERSION_1400: 1363 timeout = sp->pfs_1301.timeout; 1364 break; 1365 case PFSYNC_MSG_VERSION_1500: 1366 timeout = sp->pfs_1500.timeout; 1367 break; 1368 } 1369 1370 /* check for invalid values */ 1371 if (timeout >= PFTM_MAX || 1372 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1373 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { 1374 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1375 printf("pfsync_input: PFSYNC_ACT_UPD: " 1376 "invalid value\n"); 1377 } 1378 V_pfsyncstats.pfsyncs_badval++; 1379 continue; 1380 } 1381 1382 st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); 1383 if (st == NULL) { 1384 /* insert the update */ 1385 if (pfsync_state_import(sp, flags, msg_version)) 1386 V_pfsyncstats.pfsyncs_badstate++; 1387 continue; 1388 } 1389 1390 if (st->state_flags & PFSTATE_ACK) { 1391 pfsync_undefer_state(st, 1); 1392 } 1393 1394 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1395 sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); 1396 else { 1397 sync = 0; 1398 1399 /* 1400 * Non-TCP protocol state machine always go 1401 * forwards 1402 */ 1403 if (st->src.state > sp->pfs_1301.src.state) 1404 sync++; 1405 else 1406 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 1407 if (st->dst.state > sp->pfs_1301.dst.state) 1408 sync++; 1409 else 1410 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1411 } 1412 if (sync < 2) { 1413 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); 1414 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1415 st->expire = pf_get_uptime(); 1416 st->timeout = timeout; 1417 } 1418 st->pfsync_time = time_uptime; 1419 1420 if (sync) { 1421 V_pfsyncstats.pfsyncs_stale++; 1422 1423 pfsync_update_state(st); 1424 PF_STATE_UNLOCK(st); 1425 pfsync_push_all(sc); 1426 continue; 1427 } 1428 PF_STATE_UNLOCK(st); 1429 } 1430 1431 return (total_len); 1432 } 1433 1434 static int 1435 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) 1436 { 1437 struct pfsync_softc *sc = V_pfsyncif; 1438 struct pfsync_upd_c *ua, *up; 1439 struct pf_kstate *st; 1440 int len = count * sizeof(*up); 1441 int sync; 1442 struct mbuf *mp; 1443 int offp, i; 1444 1445 mp = m_pulldown(m, offset, len, &offp); 1446 if (mp == NULL) { 1447 V_pfsyncstats.pfsyncs_badlen++; 1448 return (-1); 1449 } 1450 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1451 1452 for (i = 0; i < count; i++) { 1453 up = &ua[i]; 1454 1455 /* check for invalid values */ 1456 if (up->timeout >= PFTM_MAX || 1457 up->src.state > PF_TCPS_PROXY_DST || 1458 up->dst.state > PF_TCPS_PROXY_DST) { 1459 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1460 printf("pfsync_input: " 1461 "PFSYNC_ACT_UPD_C: " 1462 "invalid value\n"); 1463 } 1464 V_pfsyncstats.pfsyncs_badval++; 1465 continue; 1466 } 1467 1468 st = pf_find_state_byid(up->id, up->creatorid); 1469 if (st == NULL) { 1470 /* We don't have this state. Ask for it. */ 1471 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1472 pfsync_request_update(up->creatorid, up->id); 1473 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1474 continue; 1475 } 1476 1477 if (st->state_flags & PFSTATE_ACK) { 1478 pfsync_undefer_state(st, 1); 1479 } 1480 1481 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1482 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1483 else { 1484 sync = 0; 1485 1486 /* 1487 * Non-TCP protocol state machine always go 1488 * forwards 1489 */ 1490 if (st->src.state > up->src.state) 1491 sync++; 1492 else 1493 pf_state_peer_ntoh(&up->src, &st->src); 1494 if (st->dst.state > up->dst.state) 1495 sync++; 1496 else 1497 pf_state_peer_ntoh(&up->dst, &st->dst); 1498 } 1499 if (sync < 2) { 1500 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1501 pf_state_peer_ntoh(&up->dst, &st->dst); 1502 st->expire = pf_get_uptime(); 1503 st->timeout = up->timeout; 1504 } 1505 st->pfsync_time = time_uptime; 1506 1507 if (sync) { 1508 V_pfsyncstats.pfsyncs_stale++; 1509 1510 pfsync_update_state(st); 1511 PF_STATE_UNLOCK(st); 1512 pfsync_push_all(sc); 1513 continue; 1514 } 1515 PF_STATE_UNLOCK(st); 1516 } 1517 1518 return (len); 1519 } 1520 1521 static int 1522 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) 1523 { 1524 struct pfsync_upd_req *ur, *ura; 1525 struct mbuf *mp; 1526 int len = count * sizeof(*ur); 1527 int i, offp; 1528 1529 struct pf_kstate *st; 1530 1531 mp = m_pulldown(m, offset, len, &offp); 1532 if (mp == NULL) { 1533 V_pfsyncstats.pfsyncs_badlen++; 1534 return (-1); 1535 } 1536 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1537 1538 for (i = 0; i < count; i++) { 1539 ur = &ura[i]; 1540 1541 if (ur->id == 0 && ur->creatorid == 0) 1542 pfsync_bulk_start(); 1543 else { 1544 st = pf_find_state_byid(ur->id, ur->creatorid); 1545 if (st == NULL) { 1546 V_pfsyncstats.pfsyncs_badstate++; 1547 continue; 1548 } 1549 if (st->state_flags & PFSTATE_NOSYNC) { 1550 PF_STATE_UNLOCK(st); 1551 continue; 1552 } 1553 1554 pfsync_update_state_req(st); 1555 PF_STATE_UNLOCK(st); 1556 } 1557 } 1558 1559 return (len); 1560 } 1561 1562 static int 1563 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) 1564 { 1565 struct mbuf *mp; 1566 struct pfsync_del_c *sa, *sp; 1567 struct pf_kstate *st; 1568 int len = count * sizeof(*sp); 1569 int offp, i; 1570 1571 mp = m_pulldown(m, offset, len, &offp); 1572 if (mp == NULL) { 1573 V_pfsyncstats.pfsyncs_badlen++; 1574 return (-1); 1575 } 1576 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1577 1578 for (i = 0; i < count; i++) { 1579 sp = &sa[i]; 1580 1581 st = pf_find_state_byid(sp->id, sp->creatorid); 1582 if (st == NULL) { 1583 V_pfsyncstats.pfsyncs_badstate++; 1584 continue; 1585 } 1586 1587 st->state_flags |= PFSTATE_NOSYNC; 1588 pf_remove_state(st); 1589 } 1590 1591 return (len); 1592 } 1593 1594 static int 1595 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) 1596 { 1597 struct pfsync_softc *sc = V_pfsyncif; 1598 struct pfsync_bus *bus; 1599 struct mbuf *mp; 1600 int len = count * sizeof(*bus); 1601 int offp; 1602 1603 PFSYNC_BLOCK(sc); 1604 1605 /* If we're not waiting for a bulk update, who cares. */ 1606 if (sc->sc_ureq_sent == 0) { 1607 PFSYNC_BUNLOCK(sc); 1608 return (len); 1609 } 1610 1611 mp = m_pulldown(m, offset, len, &offp); 1612 if (mp == NULL) { 1613 PFSYNC_BUNLOCK(sc); 1614 V_pfsyncstats.pfsyncs_badlen++; 1615 return (-1); 1616 } 1617 bus = (struct pfsync_bus *)(mp->m_data + offp); 1618 1619 switch (bus->status) { 1620 case PFSYNC_BUS_START: 1621 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1622 V_pf_limits[PF_LIMIT_STATES].limit / 1623 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1624 sizeof(union pfsync_state_union)), 1625 pfsync_bulk_fail, sc); 1626 if (V_pf_status.debug >= PF_DEBUG_MISC) 1627 printf("pfsync: received bulk update start\n"); 1628 break; 1629 1630 case PFSYNC_BUS_END: 1631 if (time_uptime - ntohl(bus->endtime) >= 1632 sc->sc_ureq_sent) { 1633 /* that's it, we're happy */ 1634 sc->sc_ureq_sent = 0; 1635 sc->sc_bulk_tries = 0; 1636 callout_stop(&sc->sc_bulkfail_tmo); 1637 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1638 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1639 "pfsync bulk done"); 1640 sc->sc_flags |= PFSYNCF_OK; 1641 if (V_pf_status.debug >= PF_DEBUG_MISC) 1642 printf("pfsync: received valid " 1643 "bulk update end\n"); 1644 } else { 1645 if (V_pf_status.debug >= PF_DEBUG_MISC) 1646 printf("pfsync: received invalid " 1647 "bulk update end: bad timestamp\n"); 1648 } 1649 break; 1650 } 1651 PFSYNC_BUNLOCK(sc); 1652 1653 return (len); 1654 } 1655 1656 static int 1657 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) 1658 { 1659 int len = count * sizeof(struct pfsync_tdb); 1660 1661 #if defined(IPSEC) 1662 struct pfsync_tdb *tp; 1663 struct mbuf *mp; 1664 int offp; 1665 int i; 1666 int s; 1667 1668 mp = m_pulldown(m, offset, len, &offp); 1669 if (mp == NULL) { 1670 V_pfsyncstats.pfsyncs_badlen++; 1671 return (-1); 1672 } 1673 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1674 1675 for (i = 0; i < count; i++) 1676 pfsync_update_net_tdb(&tp[i]); 1677 #endif 1678 1679 return (len); 1680 } 1681 1682 #if defined(IPSEC) 1683 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1684 static void 1685 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1686 { 1687 struct tdb *tdb; 1688 int s; 1689 1690 /* check for invalid values */ 1691 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1692 (pt->dst.sa.sa_family != AF_INET && 1693 pt->dst.sa.sa_family != AF_INET6)) 1694 goto bad; 1695 1696 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1697 if (tdb) { 1698 pt->rpl = ntohl(pt->rpl); 1699 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1700 1701 /* Neither replay nor byte counter should ever decrease. */ 1702 if (pt->rpl < tdb->tdb_rpl || 1703 pt->cur_bytes < tdb->tdb_cur_bytes) { 1704 goto bad; 1705 } 1706 1707 tdb->tdb_rpl = pt->rpl; 1708 tdb->tdb_cur_bytes = pt->cur_bytes; 1709 } 1710 return; 1711 1712 bad: 1713 if (V_pf_status.debug >= PF_DEBUG_MISC) 1714 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1715 "invalid value\n"); 1716 V_pfsyncstats.pfsyncs_badstate++; 1717 return; 1718 } 1719 #endif 1720 1721 static int 1722 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) 1723 { 1724 /* check if we are at the right place in the packet */ 1725 if (offset != m->m_pkthdr.len) 1726 V_pfsyncstats.pfsyncs_badlen++; 1727 1728 /* we're done. free and let the caller return */ 1729 m_freem(m); 1730 return (-1); 1731 } 1732 1733 static int 1734 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) 1735 { 1736 V_pfsyncstats.pfsyncs_badact++; 1737 1738 m_freem(m); 1739 return (-1); 1740 } 1741 1742 static int 1743 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1744 struct route *rt) 1745 { 1746 m_freem(m); 1747 return (0); 1748 } 1749 1750 /* ARGSUSED */ 1751 static int 1752 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1753 { 1754 struct pfsync_softc *sc = ifp->if_softc; 1755 struct ifreq *ifr = (struct ifreq *)data; 1756 struct pfsyncreq pfsyncr; 1757 size_t nvbuflen; 1758 int error; 1759 int c; 1760 1761 switch (cmd) { 1762 case SIOCSIFFLAGS: 1763 PFSYNC_LOCK(sc); 1764 if (ifp->if_flags & IFF_UP) { 1765 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1766 PFSYNC_UNLOCK(sc); 1767 pfsync_pointers_init(); 1768 } else { 1769 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1770 PFSYNC_UNLOCK(sc); 1771 pfsync_pointers_uninit(); 1772 } 1773 break; 1774 case SIOCSIFMTU: 1775 if (!sc->sc_sync_if || 1776 ifr->ifr_mtu <= PFSYNC_MINPKT || 1777 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1778 return (EINVAL); 1779 if (ifr->ifr_mtu < ifp->if_mtu) { 1780 for (c = 0; c < pfsync_buckets; c++) { 1781 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1782 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1783 pfsync_sendout(1, c); 1784 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1785 } 1786 } 1787 ifp->if_mtu = ifr->ifr_mtu; 1788 break; 1789 case SIOCGETPFSYNC: 1790 bzero(&pfsyncr, sizeof(pfsyncr)); 1791 PFSYNC_LOCK(sc); 1792 if (sc->sc_sync_if) { 1793 strlcpy(pfsyncr.pfsyncr_syncdev, 1794 sc->sc_sync_if->if_xname, IFNAMSIZ); 1795 } 1796 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1797 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1798 pfsyncr.pfsyncr_defer = sc->sc_flags; 1799 PFSYNC_UNLOCK(sc); 1800 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1801 sizeof(pfsyncr))); 1802 1803 case SIOCGETPFSYNCNV: 1804 { 1805 nvlist_t *nvl_syncpeer; 1806 nvlist_t *nvl = nvlist_create(0); 1807 1808 if (nvl == NULL) 1809 return (ENOMEM); 1810 1811 if (sc->sc_sync_if) 1812 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1813 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1814 nvlist_add_number(nvl, "flags", sc->sc_flags); 1815 nvlist_add_number(nvl, "version", sc->sc_version); 1816 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1817 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1818 1819 void *packed = NULL; 1820 packed = nvlist_pack(nvl, &nvbuflen); 1821 if (packed == NULL) { 1822 free(packed, M_NVLIST); 1823 nvlist_destroy(nvl); 1824 return (ENOMEM); 1825 } 1826 1827 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1828 ifr->ifr_cap_nv.length = nvbuflen; 1829 ifr->ifr_cap_nv.buffer = NULL; 1830 free(packed, M_NVLIST); 1831 nvlist_destroy(nvl); 1832 return (EFBIG); 1833 } 1834 1835 ifr->ifr_cap_nv.length = nvbuflen; 1836 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1837 1838 nvlist_destroy(nvl); 1839 nvlist_destroy(nvl_syncpeer); 1840 free(packed, M_NVLIST); 1841 break; 1842 } 1843 1844 case SIOCSETPFSYNC: 1845 { 1846 struct pfsync_kstatus status; 1847 1848 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1849 return (error); 1850 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1851 sizeof(pfsyncr)))) 1852 return (error); 1853 1854 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1855 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1856 1857 error = pfsync_kstatus_to_softc(&status, sc); 1858 return (error); 1859 } 1860 case SIOCSETPFSYNCNV: 1861 { 1862 struct pfsync_kstatus status; 1863 void *data; 1864 nvlist_t *nvl; 1865 1866 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1867 return (error); 1868 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1869 return (EINVAL); 1870 1871 data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); 1872 1873 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1874 ifr->ifr_cap_nv.length)) != 0) { 1875 free(data, M_PF); 1876 return (error); 1877 } 1878 1879 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1880 free(data, M_PF); 1881 return (EINVAL); 1882 } 1883 1884 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1885 pfsync_nvstatus_to_kstatus(nvl, &status); 1886 1887 nvlist_destroy(nvl); 1888 free(data, M_PF); 1889 1890 error = pfsync_kstatus_to_softc(&status, sc); 1891 return (error); 1892 } 1893 default: 1894 return (ENOTTY); 1895 } 1896 1897 return (0); 1898 } 1899 1900 static void 1901 pfsync_out_state_1301(struct pf_kstate *st, void *buf) 1902 { 1903 union pfsync_state_union *sp = buf; 1904 1905 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); 1906 } 1907 1908 static void 1909 pfsync_out_state_1400(struct pf_kstate *st, void *buf) 1910 { 1911 union pfsync_state_union *sp = buf; 1912 1913 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); 1914 } 1915 1916 static void 1917 pfsync_out_state_1500(struct pf_kstate *st, void *buf) 1918 { 1919 union pfsync_state_union *sp = buf; 1920 1921 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1500); 1922 } 1923 1924 static void 1925 pfsync_out_iack(struct pf_kstate *st, void *buf) 1926 { 1927 struct pfsync_ins_ack *iack = buf; 1928 1929 iack->id = st->id; 1930 iack->creatorid = st->creatorid; 1931 } 1932 1933 static void 1934 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1935 { 1936 struct pfsync_upd_c *up = buf; 1937 1938 bzero(up, sizeof(*up)); 1939 up->id = st->id; 1940 pf_state_peer_hton(&st->src, &up->src); 1941 pf_state_peer_hton(&st->dst, &up->dst); 1942 up->creatorid = st->creatorid; 1943 up->timeout = st->timeout; 1944 } 1945 1946 static void 1947 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1948 { 1949 struct pfsync_del_c *dp = buf; 1950 1951 dp->id = st->id; 1952 dp->creatorid = st->creatorid; 1953 st->state_flags |= PFSTATE_NOSYNC; 1954 } 1955 1956 static void 1957 pfsync_drop_all(struct pfsync_softc *sc) 1958 { 1959 struct pfsync_bucket *b; 1960 int c; 1961 1962 for (c = 0; c < pfsync_buckets; c++) { 1963 b = &sc->sc_buckets[c]; 1964 1965 PFSYNC_BUCKET_LOCK(b); 1966 pfsync_drop(sc, c); 1967 PFSYNC_BUCKET_UNLOCK(b); 1968 } 1969 } 1970 1971 static void 1972 pfsync_drop(struct pfsync_softc *sc, int c) 1973 { 1974 struct pf_kstate *st, *next; 1975 struct pfsync_upd_req_item *ur; 1976 struct pfsync_bucket *b; 1977 enum pfsync_q_id q; 1978 1979 b = &sc->sc_buckets[c]; 1980 PFSYNC_BUCKET_LOCK_ASSERT(b); 1981 1982 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1983 if (TAILQ_EMPTY(&b->b_qs[q])) 1984 continue; 1985 1986 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1987 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1988 ("%s: st->sync_state %d == q %d", 1989 __func__, st->sync_state, q)); 1990 st->sync_state = PFSYNC_S_NONE; 1991 pf_release_state(st); 1992 } 1993 TAILQ_INIT(&b->b_qs[q]); 1994 } 1995 1996 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1997 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1998 free(ur, M_PFSYNC); 1999 } 2000 2001 b->b_len = PFSYNC_MINPKT; 2002 free(b->b_plus, M_PFSYNC); 2003 b->b_plus = NULL; 2004 b->b_pluslen = 0; 2005 } 2006 2007 static void 2008 pfsync_sendout(int schedswi, int c) 2009 { 2010 struct pfsync_softc *sc = V_pfsyncif; 2011 struct ifnet *ifp = sc->sc_ifp; 2012 struct mbuf *m; 2013 struct pfsync_header *ph; 2014 struct pfsync_subheader *subh; 2015 struct pf_kstate *st, *st_next; 2016 struct pfsync_upd_req_item *ur; 2017 struct pfsync_bucket *b = &sc->sc_buckets[c]; 2018 size_t len; 2019 int aflen, offset, count = 0; 2020 enum pfsync_q_id q; 2021 2022 KASSERT(sc != NULL, ("%s: null sc", __func__)); 2023 KASSERT(b->b_len > PFSYNC_MINPKT, 2024 ("%s: sc_len %zu", __func__, b->b_len)); 2025 PFSYNC_BUCKET_LOCK_ASSERT(b); 2026 2027 if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) { 2028 pfsync_drop(sc, c); 2029 return; 2030 } 2031 2032 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 2033 if (m == NULL) { 2034 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2035 V_pfsyncstats.pfsyncs_onomem++; 2036 return; 2037 } 2038 m->m_data += max_linkhdr; 2039 bzero(m->m_data, b->b_len); 2040 2041 len = b->b_len; 2042 2043 /* build the ip header */ 2044 switch (sc->sc_sync_peer.ss_family) { 2045 #ifdef INET 2046 case AF_INET: 2047 { 2048 struct ip *ip; 2049 2050 ip = mtod(m, struct ip *); 2051 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 2052 aflen = offset = sizeof(*ip); 2053 2054 len -= sizeof(union inet_template) - sizeof(struct ip); 2055 ip->ip_len = htons(len); 2056 ip_fillid(ip, V_ip_random_id); 2057 break; 2058 } 2059 #endif 2060 #ifdef INET6 2061 case AF_INET6: 2062 { 2063 struct ip6_hdr *ip6; 2064 2065 ip6 = mtod(m, struct ip6_hdr *); 2066 bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); 2067 aflen = offset = sizeof(*ip6); 2068 2069 len -= sizeof(union inet_template) - sizeof(struct ip6_hdr); 2070 ip6->ip6_plen = htons(len); 2071 break; 2072 } 2073 #endif 2074 default: 2075 m_freem(m); 2076 pfsync_drop(sc, c); 2077 return; 2078 } 2079 m->m_len = m->m_pkthdr.len = len; 2080 2081 /* build the pfsync header */ 2082 ph = (struct pfsync_header *)(m->m_data + offset); 2083 offset += sizeof(*ph); 2084 2085 ph->version = PFSYNC_VERSION; 2086 ph->len = htons(len - aflen); 2087 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 2088 2089 /* walk the queues */ 2090 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 2091 if (TAILQ_EMPTY(&b->b_qs[q])) 2092 continue; 2093 2094 subh = (struct pfsync_subheader *)(m->m_data + offset); 2095 offset += sizeof(*subh); 2096 2097 count = 0; 2098 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 2099 KASSERT(st->sync_state == pfsync_qid_sstate[q], 2100 ("%s: st->sync_state == q", 2101 __func__)); 2102 /* 2103 * XXXGL: some of write methods do unlocked reads 2104 * of state data :( 2105 */ 2106 pfsync_qs[q].write(st, m->m_data + offset); 2107 offset += pfsync_qs[q].len; 2108 st->sync_state = PFSYNC_S_NONE; 2109 pf_release_state(st); 2110 count++; 2111 } 2112 TAILQ_INIT(&b->b_qs[q]); 2113 2114 subh->action = pfsync_qs[q].action; 2115 subh->count = htons(count); 2116 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 2117 } 2118 2119 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 2120 subh = (struct pfsync_subheader *)(m->m_data + offset); 2121 offset += sizeof(*subh); 2122 2123 count = 0; 2124 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 2125 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 2126 2127 bcopy(&ur->ur_msg, m->m_data + offset, 2128 sizeof(ur->ur_msg)); 2129 offset += sizeof(ur->ur_msg); 2130 free(ur, M_PFSYNC); 2131 count++; 2132 } 2133 2134 subh->action = PFSYNC_ACT_UPD_REQ; 2135 subh->count = htons(count); 2136 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 2137 } 2138 2139 /* has someone built a custom region for us to add? */ 2140 if (b->b_plus != NULL) { 2141 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 2142 offset += b->b_pluslen; 2143 2144 free(b->b_plus, M_PFSYNC); 2145 b->b_plus = NULL; 2146 b->b_pluslen = 0; 2147 } 2148 2149 subh = (struct pfsync_subheader *)(m->m_data + offset); 2150 offset += sizeof(*subh); 2151 2152 subh->action = PFSYNC_ACT_EOF; 2153 subh->count = htons(1); 2154 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 2155 2156 /* we're done, let's put it on the wire */ 2157 if (bpf_peers_present(ifp->if_bpf)) { 2158 m->m_data += aflen; 2159 m->m_len = m->m_pkthdr.len = len - aflen; 2160 bpf_mtap(ifp->if_bpf, m); 2161 m->m_data -= aflen; 2162 m->m_len = m->m_pkthdr.len = len; 2163 } 2164 2165 if (sc->sc_sync_if == NULL) { 2166 b->b_len = PFSYNC_MINPKT; 2167 m_freem(m); 2168 return; 2169 } 2170 2171 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 2172 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 2173 b->b_len = PFSYNC_MINPKT; 2174 2175 if (!_IF_QFULL(&b->b_snd)) 2176 _IF_ENQUEUE(&b->b_snd, m); 2177 else { 2178 m_freem(m); 2179 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 2180 } 2181 if (schedswi) 2182 swi_sched(V_pfsync_swi_cookie, 0); 2183 } 2184 2185 static void 2186 pfsync_insert_state(struct pf_kstate *st) 2187 { 2188 struct pfsync_softc *sc = V_pfsyncif; 2189 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2190 2191 if (st->state_flags & PFSTATE_NOSYNC) 2192 return; 2193 2194 if ((st->rule->rule_flag & PFRULE_NOSYNC) || 2195 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 2196 st->state_flags |= PFSTATE_NOSYNC; 2197 return; 2198 } 2199 2200 KASSERT(st->sync_state == PFSYNC_S_NONE, 2201 ("%s: st->sync_state %u", __func__, st->sync_state)); 2202 2203 PFSYNC_BUCKET_LOCK(b); 2204 if (b->b_len == PFSYNC_MINPKT) 2205 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2206 2207 pfsync_q_ins(st, PFSYNC_S_INS, true); 2208 PFSYNC_BUCKET_UNLOCK(b); 2209 2210 st->sync_updates = 0; 2211 } 2212 2213 static int 2214 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 2215 { 2216 struct pfsync_softc *sc = V_pfsyncif; 2217 struct pfsync_deferral *pd; 2218 struct pfsync_bucket *b; 2219 2220 if (m->m_flags & (M_BCAST|M_MCAST)) 2221 return (0); 2222 2223 if (sc == NULL) 2224 return (0); 2225 2226 b = pfsync_get_bucket(sc, st); 2227 2228 PFSYNC_LOCK(sc); 2229 2230 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 2231 !(sc->sc_flags & PFSYNCF_DEFER)) { 2232 PFSYNC_UNLOCK(sc); 2233 return (0); 2234 } 2235 2236 PFSYNC_BUCKET_LOCK(b); 2237 PFSYNC_UNLOCK(sc); 2238 2239 if (b->b_deferred >= 128) 2240 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 2241 2242 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 2243 if (pd == NULL) { 2244 PFSYNC_BUCKET_UNLOCK(b); 2245 return (0); 2246 } 2247 b->b_deferred++; 2248 2249 m->m_flags |= M_SKIP_FIREWALL; 2250 st->state_flags |= PFSTATE_ACK; 2251 2252 pd->pd_sc = sc; 2253 pd->pd_st = st; 2254 pf_ref_state(st); 2255 pd->pd_m = m; 2256 2257 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 2258 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 2259 callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, 2260 pfsync_defer_tmo, pd); 2261 2262 pfsync_push(b); 2263 PFSYNC_BUCKET_UNLOCK(b); 2264 2265 return (1); 2266 } 2267 2268 static void 2269 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2270 { 2271 struct pfsync_softc *sc = pd->pd_sc; 2272 struct mbuf *m = pd->pd_m; 2273 struct pf_kstate *st = pd->pd_st; 2274 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2275 2276 PFSYNC_BUCKET_LOCK_ASSERT(b); 2277 2278 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2279 b->b_deferred--; 2280 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2281 free(pd, M_PFSYNC); 2282 pf_release_state(st); 2283 2284 if (drop) 2285 m_freem(m); 2286 else { 2287 _IF_ENQUEUE(&b->b_snd, m); 2288 pfsync_push(b); 2289 } 2290 } 2291 2292 static void 2293 pfsync_defer_tmo(void *arg) 2294 { 2295 struct epoch_tracker et; 2296 struct pfsync_deferral *pd = arg; 2297 struct pfsync_softc *sc = pd->pd_sc; 2298 struct mbuf *m = pd->pd_m; 2299 struct pf_kstate *st = pd->pd_st; 2300 struct pfsync_bucket *b; 2301 2302 CURVNET_SET(sc->sc_ifp->if_vnet); 2303 2304 b = pfsync_get_bucket(sc, st); 2305 2306 PFSYNC_BUCKET_LOCK_ASSERT(b); 2307 2308 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2309 b->b_deferred--; 2310 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2311 PFSYNC_BUCKET_UNLOCK(b); 2312 free(pd, M_PFSYNC); 2313 2314 if (sc->sc_sync_if == NULL) { 2315 pf_release_state(st); 2316 m_freem(m); 2317 CURVNET_RESTORE(); 2318 return; 2319 } 2320 2321 NET_EPOCH_ENTER(et); 2322 2323 pfsync_tx(sc, m); 2324 2325 pf_release_state(st); 2326 2327 CURVNET_RESTORE(); 2328 NET_EPOCH_EXIT(et); 2329 } 2330 2331 static void 2332 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 2333 { 2334 struct pfsync_softc *sc = V_pfsyncif; 2335 struct pfsync_deferral *pd; 2336 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2337 2338 PFSYNC_BUCKET_LOCK_ASSERT(b); 2339 2340 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 2341 if (pd->pd_st == st) { 2342 if (callout_stop(&pd->pd_tmo) > 0) 2343 pfsync_undefer(pd, drop); 2344 2345 return; 2346 } 2347 } 2348 2349 panic("%s: unable to find deferred state", __func__); 2350 } 2351 2352 static void 2353 pfsync_undefer_state(struct pf_kstate *st, int drop) 2354 { 2355 struct pfsync_softc *sc = V_pfsyncif; 2356 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2357 2358 PFSYNC_BUCKET_LOCK(b); 2359 pfsync_undefer_state_locked(st, drop); 2360 PFSYNC_BUCKET_UNLOCK(b); 2361 } 2362 2363 static struct pfsync_bucket* 2364 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 2365 { 2366 int c = PF_IDHASH(st) % pfsync_buckets; 2367 return &sc->sc_buckets[c]; 2368 } 2369 2370 static void 2371 pfsync_update_state(struct pf_kstate *st) 2372 { 2373 struct pfsync_softc *sc = V_pfsyncif; 2374 bool sync = false, ref = true; 2375 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2376 2377 PF_STATE_LOCK_ASSERT(st); 2378 PFSYNC_BUCKET_LOCK(b); 2379 2380 if (st->state_flags & PFSTATE_ACK) 2381 pfsync_undefer_state_locked(st, 0); 2382 if (st->state_flags & PFSTATE_NOSYNC) { 2383 if (st->sync_state != PFSYNC_S_NONE) 2384 pfsync_q_del(st, true, b); 2385 PFSYNC_BUCKET_UNLOCK(b); 2386 return; 2387 } 2388 2389 if (b->b_len == PFSYNC_MINPKT) 2390 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2391 2392 switch (st->sync_state) { 2393 case PFSYNC_S_UPD_C: 2394 case PFSYNC_S_UPD: 2395 case PFSYNC_S_INS: 2396 /* we're already handling it */ 2397 2398 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2399 st->sync_updates++; 2400 if (st->sync_updates >= sc->sc_maxupdates) 2401 sync = true; 2402 } 2403 break; 2404 2405 case PFSYNC_S_IACK: 2406 pfsync_q_del(st, false, b); 2407 ref = false; 2408 /* FALLTHROUGH */ 2409 2410 case PFSYNC_S_NONE: 2411 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 2412 st->sync_updates = 0; 2413 break; 2414 2415 default: 2416 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2417 } 2418 2419 if (sync || (time_uptime - st->pfsync_time) < 2) 2420 pfsync_push(b); 2421 2422 PFSYNC_BUCKET_UNLOCK(b); 2423 } 2424 2425 static void 2426 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2427 { 2428 struct pfsync_softc *sc = V_pfsyncif; 2429 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2430 struct pfsync_upd_req_item *item; 2431 size_t nlen = sizeof(struct pfsync_upd_req); 2432 2433 PFSYNC_BUCKET_LOCK_ASSERT(b); 2434 2435 /* 2436 * This code does a bit to prevent multiple update requests for the 2437 * same state being generated. It searches current subheader queue, 2438 * but it doesn't lookup into queue of already packed datagrams. 2439 */ 2440 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 2441 if (item->ur_msg.id == id && 2442 item->ur_msg.creatorid == creatorid) 2443 return; 2444 2445 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 2446 if (item == NULL) 2447 return; /* XXX stats */ 2448 2449 item->ur_msg.id = id; 2450 item->ur_msg.creatorid = creatorid; 2451 2452 if (TAILQ_EMPTY(&b->b_upd_req_list)) 2453 nlen += sizeof(struct pfsync_subheader); 2454 2455 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2456 pfsync_sendout(0, 0); 2457 2458 nlen = sizeof(struct pfsync_subheader) + 2459 sizeof(struct pfsync_upd_req); 2460 } 2461 2462 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2463 b->b_len += nlen; 2464 2465 pfsync_push(b); 2466 } 2467 2468 static bool 2469 pfsync_update_state_req(struct pf_kstate *st) 2470 { 2471 struct pfsync_softc *sc = V_pfsyncif; 2472 bool ref = true, full = false; 2473 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2474 2475 PF_STATE_LOCK_ASSERT(st); 2476 PFSYNC_BUCKET_LOCK(b); 2477 2478 if (st->state_flags & PFSTATE_NOSYNC) { 2479 if (st->sync_state != PFSYNC_S_NONE) 2480 pfsync_q_del(st, true, b); 2481 PFSYNC_BUCKET_UNLOCK(b); 2482 return (full); 2483 } 2484 2485 switch (st->sync_state) { 2486 case PFSYNC_S_UPD_C: 2487 case PFSYNC_S_IACK: 2488 pfsync_q_del(st, false, b); 2489 ref = false; 2490 /* FALLTHROUGH */ 2491 2492 case PFSYNC_S_NONE: 2493 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2494 pfsync_push(b); 2495 break; 2496 2497 case PFSYNC_S_INS: 2498 case PFSYNC_S_UPD: 2499 case PFSYNC_S_DEL_C: 2500 /* we're already handling it */ 2501 break; 2502 2503 default: 2504 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2505 } 2506 2507 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) 2508 full = true; 2509 2510 PFSYNC_BUCKET_UNLOCK(b); 2511 2512 return (full); 2513 } 2514 2515 static void 2516 pfsync_delete_state(struct pf_kstate *st) 2517 { 2518 struct pfsync_softc *sc = V_pfsyncif; 2519 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2520 bool ref = true; 2521 2522 PFSYNC_BUCKET_LOCK(b); 2523 if (st->state_flags & PFSTATE_ACK) 2524 pfsync_undefer_state_locked(st, 1); 2525 if (st->state_flags & PFSTATE_NOSYNC) { 2526 if (st->sync_state != PFSYNC_S_NONE) 2527 pfsync_q_del(st, true, b); 2528 PFSYNC_BUCKET_UNLOCK(b); 2529 return; 2530 } 2531 2532 if (b->b_len == PFSYNC_MINPKT) 2533 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2534 2535 switch (st->sync_state) { 2536 case PFSYNC_S_INS: 2537 /* We never got to tell the world so just forget about it. */ 2538 pfsync_q_del(st, true, b); 2539 break; 2540 2541 case PFSYNC_S_UPD_C: 2542 case PFSYNC_S_UPD: 2543 case PFSYNC_S_IACK: 2544 pfsync_q_del(st, false, b); 2545 ref = false; 2546 /* FALLTHROUGH */ 2547 2548 case PFSYNC_S_NONE: 2549 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2550 break; 2551 2552 default: 2553 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2554 } 2555 2556 PFSYNC_BUCKET_UNLOCK(b); 2557 } 2558 2559 static void 2560 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2561 { 2562 struct { 2563 struct pfsync_subheader subh; 2564 struct pfsync_clr clr; 2565 } __packed r; 2566 2567 bzero(&r, sizeof(r)); 2568 2569 r.subh.action = PFSYNC_ACT_CLR; 2570 r.subh.count = htons(1); 2571 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2572 2573 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2574 r.clr.creatorid = creatorid; 2575 2576 pfsync_send_plus(&r, sizeof(r)); 2577 } 2578 2579 static enum pfsync_q_id 2580 pfsync_sstate_to_qid(u_int8_t sync_state) 2581 { 2582 struct pfsync_softc *sc = V_pfsyncif; 2583 2584 switch (sync_state) { 2585 case PFSYNC_S_INS: 2586 switch (sc->sc_version) { 2587 case PFSYNC_MSG_VERSION_1301: 2588 return PFSYNC_Q_INS_1301; 2589 case PFSYNC_MSG_VERSION_1400: 2590 return PFSYNC_Q_INS_1400; 2591 case PFSYNC_MSG_VERSION_1500: 2592 return PFSYNC_Q_INS_1500; 2593 } 2594 break; 2595 case PFSYNC_S_IACK: 2596 return PFSYNC_Q_IACK; 2597 case PFSYNC_S_UPD: 2598 switch (sc->sc_version) { 2599 case PFSYNC_MSG_VERSION_1301: 2600 return PFSYNC_Q_UPD_1301; 2601 case PFSYNC_MSG_VERSION_1400: 2602 return PFSYNC_Q_UPD_1400; 2603 case PFSYNC_MSG_VERSION_1500: 2604 return PFSYNC_Q_UPD_1500; 2605 } 2606 break; 2607 case PFSYNC_S_UPD_C: 2608 return PFSYNC_Q_UPD_C; 2609 case PFSYNC_S_DEL_C: 2610 return PFSYNC_Q_DEL_C; 2611 default: 2612 panic("%s: Unsupported st->sync_state 0x%02x", 2613 __func__, sync_state); 2614 } 2615 2616 panic("%s: Unsupported pfsync_msg_version %d", 2617 __func__, sc->sc_version); 2618 } 2619 2620 static void 2621 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) 2622 { 2623 enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); 2624 struct pfsync_softc *sc = V_pfsyncif; 2625 size_t nlen = pfsync_qs[q].len; 2626 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2627 2628 PFSYNC_BUCKET_LOCK_ASSERT(b); 2629 2630 KASSERT(st->sync_state == PFSYNC_S_NONE, 2631 ("%s: st->sync_state %u", __func__, st->sync_state)); 2632 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2633 b->b_len)); 2634 2635 if (TAILQ_EMPTY(&b->b_qs[q])) 2636 nlen += sizeof(struct pfsync_subheader); 2637 2638 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2639 pfsync_sendout(1, b->b_id); 2640 2641 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2642 } 2643 2644 b->b_len += nlen; 2645 st->sync_state = pfsync_qid_sstate[q]; 2646 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2647 if (ref) 2648 pf_ref_state(st); 2649 } 2650 2651 static void 2652 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2653 { 2654 enum pfsync_q_id q; 2655 2656 PFSYNC_BUCKET_LOCK_ASSERT(b); 2657 KASSERT(st->sync_state != PFSYNC_S_NONE, 2658 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2659 2660 q = pfsync_sstate_to_qid(st->sync_state); 2661 b->b_len -= pfsync_qs[q].len; 2662 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2663 st->sync_state = PFSYNC_S_NONE; 2664 if (unref) 2665 pf_release_state(st); 2666 2667 if (TAILQ_EMPTY(&b->b_qs[q])) 2668 b->b_len -= sizeof(struct pfsync_subheader); 2669 } 2670 2671 static void 2672 pfsync_bulk_start(void) 2673 { 2674 struct pfsync_softc *sc = V_pfsyncif; 2675 2676 if (V_pf_status.debug >= PF_DEBUG_MISC) 2677 printf("pfsync: received bulk update request\n"); 2678 2679 PFSYNC_BLOCK(sc); 2680 2681 sc->sc_ureq_received = time_uptime; 2682 sc->sc_bulk_hashid = 0; 2683 sc->sc_bulk_stateid = 0; 2684 pfsync_bulk_status(PFSYNC_BUS_START); 2685 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2686 PFSYNC_BUNLOCK(sc); 2687 } 2688 2689 static void 2690 pfsync_bulk_update(void *arg) 2691 { 2692 struct pfsync_softc *sc = arg; 2693 struct pf_kstate *s; 2694 int i; 2695 2696 PFSYNC_BLOCK_ASSERT(sc); 2697 CURVNET_SET(sc->sc_ifp->if_vnet); 2698 2699 /* 2700 * Start with last state from previous invocation. 2701 * It may had gone, in this case start from the 2702 * hash slot. 2703 */ 2704 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2705 2706 if (s != NULL) 2707 i = PF_IDHASH(s); 2708 else 2709 i = sc->sc_bulk_hashid; 2710 2711 for (; i <= V_pf_hashmask; i++) { 2712 struct pf_idhash *ih = &V_pf_idhash[i]; 2713 2714 if (s != NULL) 2715 PF_HASHROW_ASSERT(ih); 2716 else { 2717 PF_HASHROW_LOCK(ih); 2718 s = LIST_FIRST(&ih->states); 2719 } 2720 2721 for (; s; s = LIST_NEXT(s, entry)) { 2722 if (s->sync_state == PFSYNC_S_NONE && 2723 s->timeout < PFTM_MAX && 2724 s->pfsync_time <= sc->sc_ureq_received) { 2725 if (pfsync_update_state_req(s)) { 2726 /* We've filled a packet. */ 2727 sc->sc_bulk_hashid = i; 2728 sc->sc_bulk_stateid = s->id; 2729 sc->sc_bulk_creatorid = s->creatorid; 2730 PF_HASHROW_UNLOCK(ih); 2731 callout_reset(&sc->sc_bulk_tmo, 1, 2732 pfsync_bulk_update, sc); 2733 goto full; 2734 } 2735 } 2736 } 2737 PF_HASHROW_UNLOCK(ih); 2738 } 2739 2740 /* We're done. */ 2741 pfsync_bulk_status(PFSYNC_BUS_END); 2742 full: 2743 CURVNET_RESTORE(); 2744 } 2745 2746 static void 2747 pfsync_bulk_status(u_int8_t status) 2748 { 2749 struct { 2750 struct pfsync_subheader subh; 2751 struct pfsync_bus bus; 2752 } __packed r; 2753 2754 struct pfsync_softc *sc = V_pfsyncif; 2755 2756 bzero(&r, sizeof(r)); 2757 2758 r.subh.action = PFSYNC_ACT_BUS; 2759 r.subh.count = htons(1); 2760 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2761 2762 r.bus.creatorid = V_pf_status.hostid; 2763 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2764 r.bus.status = status; 2765 2766 pfsync_send_plus(&r, sizeof(r)); 2767 } 2768 2769 static void 2770 pfsync_bulk_fail(void *arg) 2771 { 2772 struct pfsync_softc *sc = arg; 2773 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2774 2775 CURVNET_SET(sc->sc_ifp->if_vnet); 2776 2777 PFSYNC_BLOCK_ASSERT(sc); 2778 2779 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2780 /* Try again */ 2781 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2782 pfsync_bulk_fail, V_pfsyncif); 2783 PFSYNC_BUCKET_LOCK(b); 2784 pfsync_request_update(0, 0); 2785 PFSYNC_BUCKET_UNLOCK(b); 2786 } else { 2787 /* Pretend like the transfer was ok. */ 2788 sc->sc_ureq_sent = 0; 2789 sc->sc_bulk_tries = 0; 2790 PFSYNC_LOCK(sc); 2791 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2792 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2793 "pfsync bulk fail"); 2794 sc->sc_flags |= PFSYNCF_OK; 2795 PFSYNC_UNLOCK(sc); 2796 if (V_pf_status.debug >= PF_DEBUG_MISC) 2797 printf("pfsync: failed to receive bulk update\n"); 2798 } 2799 2800 CURVNET_RESTORE(); 2801 } 2802 2803 static void 2804 pfsync_send_plus(void *plus, size_t pluslen) 2805 { 2806 struct pfsync_softc *sc = V_pfsyncif; 2807 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2808 uint8_t *newplus; 2809 2810 PFSYNC_BUCKET_LOCK(b); 2811 2812 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2813 pfsync_sendout(1, b->b_id); 2814 2815 newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT); 2816 if (newplus == NULL) 2817 goto out; 2818 2819 if (b->b_plus != NULL) { 2820 memcpy(newplus, b->b_plus, b->b_pluslen); 2821 free(b->b_plus, M_PFSYNC); 2822 } else { 2823 MPASS(b->b_pluslen == 0); 2824 } 2825 memcpy(newplus + b->b_pluslen, plus, pluslen); 2826 2827 b->b_plus = newplus; 2828 b->b_pluslen += pluslen; 2829 b->b_len += pluslen; 2830 2831 pfsync_sendout(1, b->b_id); 2832 2833 out: 2834 PFSYNC_BUCKET_UNLOCK(b); 2835 } 2836 2837 static void 2838 pfsync_timeout(void *arg) 2839 { 2840 struct pfsync_bucket *b = arg; 2841 2842 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2843 PFSYNC_BUCKET_LOCK(b); 2844 pfsync_push(b); 2845 PFSYNC_BUCKET_UNLOCK(b); 2846 CURVNET_RESTORE(); 2847 } 2848 2849 static void 2850 pfsync_push(struct pfsync_bucket *b) 2851 { 2852 2853 PFSYNC_BUCKET_LOCK_ASSERT(b); 2854 2855 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2856 swi_sched(V_pfsync_swi_cookie, 0); 2857 } 2858 2859 static void 2860 pfsync_push_all(struct pfsync_softc *sc) 2861 { 2862 int c; 2863 struct pfsync_bucket *b; 2864 2865 for (c = 0; c < pfsync_buckets; c++) { 2866 b = &sc->sc_buckets[c]; 2867 2868 PFSYNC_BUCKET_LOCK(b); 2869 pfsync_push(b); 2870 PFSYNC_BUCKET_UNLOCK(b); 2871 } 2872 } 2873 2874 static void 2875 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2876 { 2877 struct ip *ip; 2878 int af, error = 0; 2879 2880 ip = mtod(m, struct ip *); 2881 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2882 2883 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2884 2885 /* 2886 * We distinguish between a deferral packet and our 2887 * own pfsync packet based on M_SKIP_FIREWALL 2888 * flag. This is XXX. 2889 */ 2890 switch (af) { 2891 #ifdef INET 2892 case AF_INET: 2893 if (m->m_flags & M_SKIP_FIREWALL) { 2894 error = ip_output(m, NULL, NULL, 0, 2895 NULL, NULL); 2896 } else { 2897 error = ip_output(m, NULL, NULL, 2898 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2899 } 2900 break; 2901 #endif 2902 #ifdef INET6 2903 case AF_INET6: 2904 if (m->m_flags & M_SKIP_FIREWALL) { 2905 error = ip6_output(m, NULL, NULL, 0, 2906 NULL, NULL, NULL); 2907 } else { 2908 error = ip6_output(m, NULL, NULL, 0, 2909 &sc->sc_im6o, NULL, NULL); 2910 } 2911 break; 2912 #endif 2913 } 2914 2915 if (error == 0) 2916 V_pfsyncstats.pfsyncs_opackets++; 2917 else 2918 V_pfsyncstats.pfsyncs_oerrors++; 2919 2920 } 2921 2922 static void 2923 pfsyncintr(void *arg) 2924 { 2925 struct epoch_tracker et; 2926 struct pfsync_softc *sc = arg; 2927 struct pfsync_bucket *b; 2928 struct mbuf *m, *n; 2929 int c; 2930 2931 NET_EPOCH_ENTER(et); 2932 CURVNET_SET(sc->sc_ifp->if_vnet); 2933 2934 for (c = 0; c < pfsync_buckets; c++) { 2935 b = &sc->sc_buckets[c]; 2936 2937 PFSYNC_BUCKET_LOCK(b); 2938 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2939 pfsync_sendout(0, b->b_id); 2940 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2941 } 2942 _IF_DEQUEUE_ALL(&b->b_snd, m); 2943 PFSYNC_BUCKET_UNLOCK(b); 2944 2945 for (; m != NULL; m = n) { 2946 n = m->m_nextpkt; 2947 m->m_nextpkt = NULL; 2948 2949 pfsync_tx(sc, m); 2950 } 2951 } 2952 CURVNET_RESTORE(); 2953 NET_EPOCH_EXIT(et); 2954 } 2955 2956 static int 2957 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2958 struct in_mfilter* imf, struct in6_mfilter* im6f) 2959 { 2960 #ifdef INET 2961 struct ip_moptions *imo = &sc->sc_imo; 2962 #endif 2963 #ifdef INET6 2964 struct ip6_moptions *im6o = &sc->sc_im6o; 2965 struct sockaddr_in6 *syncpeer_sa6 = NULL; 2966 #endif 2967 2968 if (!(ifp->if_flags & IFF_MULTICAST)) 2969 return (EADDRNOTAVAIL); 2970 2971 switch (sc->sc_sync_peer.ss_family) { 2972 #ifdef INET 2973 case AF_INET: 2974 { 2975 int error; 2976 2977 ip_mfilter_init(&imo->imo_head); 2978 imo->imo_multicast_vif = -1; 2979 if ((error = in_joingroup(ifp, 2980 &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2981 &imf->imf_inm)) != 0) 2982 return (error); 2983 2984 ip_mfilter_insert(&imo->imo_head, imf); 2985 imo->imo_multicast_ifp = ifp; 2986 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2987 imo->imo_multicast_loop = 0; 2988 break; 2989 } 2990 #endif 2991 #ifdef INET6 2992 case AF_INET6: 2993 { 2994 int error; 2995 2996 syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; 2997 if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) 2998 return (error); 2999 3000 ip6_mfilter_init(&im6o->im6o_head); 3001 if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, 3002 &(im6f->im6f_in6m), 0)) != 0) 3003 return (error); 3004 3005 ip6_mfilter_insert(&im6o->im6o_head, im6f); 3006 im6o->im6o_multicast_ifp = ifp; 3007 im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; 3008 im6o->im6o_multicast_loop = 0; 3009 break; 3010 } 3011 #endif 3012 } 3013 3014 return (0); 3015 } 3016 3017 static void 3018 pfsync_multicast_cleanup(struct pfsync_softc *sc) 3019 { 3020 #ifdef INET 3021 struct ip_moptions *imo = &sc->sc_imo; 3022 struct in_mfilter *imf; 3023 3024 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 3025 ip_mfilter_remove(&imo->imo_head, imf); 3026 in_leavegroup(imf->imf_inm, NULL); 3027 ip_mfilter_free(imf); 3028 } 3029 imo->imo_multicast_ifp = NULL; 3030 #endif 3031 3032 #ifdef INET6 3033 struct ip6_moptions *im6o = &sc->sc_im6o; 3034 struct in6_mfilter *im6f; 3035 3036 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 3037 ip6_mfilter_remove(&im6o->im6o_head, im6f); 3038 in6_leavegroup(im6f->im6f_in6m, NULL); 3039 ip6_mfilter_free(im6f); 3040 } 3041 im6o->im6o_multicast_ifp = NULL; 3042 #endif 3043 } 3044 3045 void 3046 pfsync_detach_ifnet(struct ifnet *ifp) 3047 { 3048 struct pfsync_softc *sc = V_pfsyncif; 3049 3050 if (sc == NULL) 3051 return; 3052 3053 PFSYNC_LOCK(sc); 3054 3055 if (sc->sc_sync_if == ifp) { 3056 /* We don't need mutlicast cleanup here, because the interface 3057 * is going away. We do need to ensure we don't try to do 3058 * cleanup later. 3059 */ 3060 ip_mfilter_init(&sc->sc_imo.imo_head); 3061 sc->sc_imo.imo_multicast_ifp = NULL; 3062 sc->sc_im6o.im6o_multicast_ifp = NULL; 3063 sc->sc_sync_if = NULL; 3064 } 3065 3066 PFSYNC_UNLOCK(sc); 3067 } 3068 3069 static int 3070 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 3071 { 3072 struct sockaddr_storage sa; 3073 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 3074 status->flags = pfsyncr->pfsyncr_defer; 3075 3076 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 3077 3078 memset(&sa, 0, sizeof(sa)); 3079 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 3080 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 3081 in->sin_family = AF_INET; 3082 in->sin_len = sizeof(*in); 3083 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 3084 } 3085 status->syncpeer = sa; 3086 3087 return 0; 3088 } 3089 3090 static int 3091 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 3092 { 3093 struct ifnet *sifp; 3094 struct in_mfilter *imf = NULL; 3095 struct in6_mfilter *im6f = NULL; 3096 int error; 3097 int c; 3098 3099 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 3100 return (EINVAL); 3101 3102 if (status->syncdev[0] == '\0') 3103 sifp = NULL; 3104 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 3105 return (EINVAL); 3106 3107 switch (status->syncpeer.ss_family) { 3108 #ifdef INET 3109 case AF_UNSPEC: 3110 case AF_INET: { 3111 struct sockaddr_in *status_sin; 3112 status_sin = (struct sockaddr_in *)&(status->syncpeer); 3113 if (sifp != NULL) { 3114 if (status_sin->sin_addr.s_addr == 0 || 3115 status_sin->sin_addr.s_addr == 3116 htonl(INADDR_PFSYNC_GROUP)) { 3117 status_sin->sin_family = AF_INET; 3118 status_sin->sin_len = sizeof(*status_sin); 3119 status_sin->sin_addr.s_addr = 3120 htonl(INADDR_PFSYNC_GROUP); 3121 } 3122 3123 if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { 3124 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 3125 } 3126 } 3127 break; 3128 } 3129 #endif 3130 #ifdef INET6 3131 case AF_INET6: { 3132 struct sockaddr_in6 *status_sin6; 3133 status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); 3134 if (sifp != NULL) { 3135 if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || 3136 IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, 3137 &in6addr_linklocal_pfsync_group)) { 3138 status_sin6->sin6_family = AF_INET6; 3139 status_sin6->sin6_len = sizeof(*status_sin6); 3140 status_sin6->sin6_addr = 3141 in6addr_linklocal_pfsync_group; 3142 } 3143 3144 if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { 3145 im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); 3146 } 3147 } 3148 break; 3149 } 3150 #endif 3151 } 3152 3153 PFSYNC_LOCK(sc); 3154 3155 switch (status->version) { 3156 case PFSYNC_MSG_VERSION_UNSPECIFIED: 3157 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 3158 break; 3159 case PFSYNC_MSG_VERSION_1301: 3160 case PFSYNC_MSG_VERSION_1400: 3161 case PFSYNC_MSG_VERSION_1500: 3162 sc->sc_version = status->version; 3163 break; 3164 default: 3165 PFSYNC_UNLOCK(sc); 3166 return (EINVAL); 3167 } 3168 3169 switch (status->syncpeer.ss_family) { 3170 case AF_INET: { 3171 struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); 3172 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 3173 sc_sin->sin_family = AF_INET; 3174 sc_sin->sin_len = sizeof(*sc_sin); 3175 if (status_sin->sin_addr.s_addr == 0) { 3176 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 3177 } else { 3178 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 3179 } 3180 break; 3181 } 3182 case AF_INET6: { 3183 struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); 3184 struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; 3185 sc_sin->sin6_family = AF_INET6; 3186 sc_sin->sin6_len = sizeof(*sc_sin); 3187 if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { 3188 sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; 3189 } else { 3190 sc_sin->sin6_addr = status_sin->sin6_addr; 3191 } 3192 break; 3193 } 3194 } 3195 3196 sc->sc_maxupdates = status->maxupdates; 3197 if (status->flags & PFSYNCF_DEFER) { 3198 sc->sc_flags |= PFSYNCF_DEFER; 3199 V_pfsync_defer_ptr = pfsync_defer; 3200 } else { 3201 sc->sc_flags &= ~PFSYNCF_DEFER; 3202 V_pfsync_defer_ptr = NULL; 3203 } 3204 3205 if (sifp == NULL) { 3206 if (sc->sc_sync_if) 3207 if_rele(sc->sc_sync_if); 3208 sc->sc_sync_if = NULL; 3209 pfsync_multicast_cleanup(sc); 3210 PFSYNC_UNLOCK(sc); 3211 return (0); 3212 } 3213 3214 for (c = 0; c < pfsync_buckets; c++) { 3215 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 3216 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 3217 (sifp->if_mtu < sc->sc_ifp->if_mtu || 3218 (sc->sc_sync_if != NULL && 3219 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 3220 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 3221 pfsync_sendout(1, c); 3222 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 3223 } 3224 3225 pfsync_multicast_cleanup(sc); 3226 3227 if (((sc->sc_sync_peer.ss_family == AF_INET) && 3228 IN_MULTICAST(ntohl(((struct sockaddr_in *) 3229 &sc->sc_sync_peer)->sin_addr.s_addr))) || 3230 ((sc->sc_sync_peer.ss_family == AF_INET6) && 3231 IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) 3232 &sc->sc_sync_peer)->sin6_addr))) { 3233 error = pfsync_multicast_setup(sc, sifp, imf, im6f); 3234 if (error) { 3235 if_rele(sifp); 3236 PFSYNC_UNLOCK(sc); 3237 #ifdef INET 3238 if (imf != NULL) 3239 ip_mfilter_free(imf); 3240 #endif 3241 #ifdef INET6 3242 if (im6f != NULL) 3243 ip6_mfilter_free(im6f); 3244 #endif 3245 return (error); 3246 } 3247 } 3248 if (sc->sc_sync_if) 3249 if_rele(sc->sc_sync_if); 3250 sc->sc_sync_if = sifp; 3251 3252 switch (sc->sc_sync_peer.ss_family) { 3253 #ifdef INET 3254 case AF_INET: { 3255 struct ip *ip; 3256 ip = &sc->sc_template.ipv4; 3257 bzero(ip, sizeof(*ip)); 3258 ip->ip_v = IPVERSION; 3259 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 3260 ip->ip_tos = IPTOS_LOWDELAY; 3261 /* len and id are set later. */ 3262 ip->ip_off = htons(IP_DF); 3263 ip->ip_ttl = PFSYNC_DFLTTL; 3264 ip->ip_p = IPPROTO_PFSYNC; 3265 ip->ip_src.s_addr = INADDR_ANY; 3266 ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 3267 break; 3268 } 3269 #endif 3270 #ifdef INET6 3271 case AF_INET6: { 3272 struct ip6_hdr *ip6; 3273 ip6 = &sc->sc_template.ipv6; 3274 bzero(ip6, sizeof(*ip6)); 3275 ip6->ip6_vfc = IPV6_VERSION; 3276 ip6->ip6_hlim = PFSYNC_DFLTTL; 3277 ip6->ip6_nxt = IPPROTO_PFSYNC; 3278 ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; 3279 3280 struct epoch_tracker et; 3281 NET_EPOCH_ENTER(et); 3282 in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, 3283 sc->sc_sync_if, &ip6->ip6_src, NULL); 3284 NET_EPOCH_EXIT(et); 3285 break; 3286 } 3287 #endif 3288 } 3289 3290 /* Request a full state table update. */ 3291 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 3292 (*carp_demote_adj_p)(V_pfsync_carp_adj, 3293 "pfsync bulk start"); 3294 sc->sc_flags &= ~PFSYNCF_OK; 3295 if (V_pf_status.debug >= PF_DEBUG_MISC) 3296 printf("pfsync: requesting bulk update\n"); 3297 PFSYNC_UNLOCK(sc); 3298 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 3299 pfsync_request_update(0, 0); 3300 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 3301 PFSYNC_BLOCK(sc); 3302 sc->sc_ureq_sent = time_uptime; 3303 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 3304 PFSYNC_BUNLOCK(sc); 3305 return (0); 3306 } 3307 3308 static void 3309 pfsync_pointers_init(void) 3310 { 3311 3312 PF_RULES_WLOCK(); 3313 V_pfsync_state_import_ptr = pfsync_state_import; 3314 V_pfsync_insert_state_ptr = pfsync_insert_state; 3315 V_pfsync_update_state_ptr = pfsync_update_state; 3316 V_pfsync_delete_state_ptr = pfsync_delete_state; 3317 V_pfsync_clear_states_ptr = pfsync_clear_states; 3318 V_pfsync_defer_ptr = pfsync_defer; 3319 PF_RULES_WUNLOCK(); 3320 } 3321 3322 static void 3323 pfsync_pointers_uninit(void) 3324 { 3325 3326 PF_RULES_WLOCK(); 3327 V_pfsync_state_import_ptr = NULL; 3328 V_pfsync_insert_state_ptr = NULL; 3329 V_pfsync_update_state_ptr = NULL; 3330 V_pfsync_delete_state_ptr = NULL; 3331 V_pfsync_clear_states_ptr = NULL; 3332 V_pfsync_defer_ptr = NULL; 3333 PF_RULES_WUNLOCK(); 3334 } 3335 3336 static void 3337 vnet_pfsync_init(const void *unused __unused) 3338 { 3339 int error; 3340 3341 V_pfsync_cloner = if_clone_simple(pfsyncname, 3342 pfsync_clone_create, pfsync_clone_destroy, 1); 3343 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 3344 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 3345 if (error) { 3346 if_clone_detach(V_pfsync_cloner); 3347 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 3348 } 3349 3350 pfsync_pointers_init(); 3351 } 3352 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 3353 vnet_pfsync_init, NULL); 3354 3355 static void 3356 vnet_pfsync_uninit(const void *unused __unused) 3357 { 3358 int ret __diagused; 3359 3360 pfsync_pointers_uninit(); 3361 3362 if_clone_detach(V_pfsync_cloner); 3363 ret = swi_remove(V_pfsync_swi_cookie); 3364 MPASS(ret == 0); 3365 ret = intr_event_destroy(V_pfsync_swi_ie); 3366 MPASS(ret == 0); 3367 } 3368 3369 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 3370 vnet_pfsync_uninit, NULL); 3371 3372 static int 3373 pfsync_init(void) 3374 { 3375 int error; 3376 3377 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 3378 3379 #ifdef INET 3380 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 3381 if (error) 3382 return (error); 3383 #endif 3384 #ifdef INET6 3385 error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); 3386 if (error) { 3387 ipproto_unregister(IPPROTO_PFSYNC); 3388 return (error); 3389 } 3390 #endif 3391 3392 return (0); 3393 } 3394 3395 static void 3396 pfsync_uninit(void) 3397 { 3398 pfsync_detach_ifnet_ptr = NULL; 3399 3400 #ifdef INET 3401 ipproto_unregister(IPPROTO_PFSYNC); 3402 #endif 3403 #ifdef INET6 3404 ip6proto_unregister(IPPROTO_PFSYNC); 3405 #endif 3406 } 3407 3408 static int 3409 pfsync_modevent(module_t mod, int type, void *data) 3410 { 3411 int error = 0; 3412 3413 switch (type) { 3414 case MOD_LOAD: 3415 error = pfsync_init(); 3416 break; 3417 case MOD_UNLOAD: 3418 pfsync_uninit(); 3419 break; 3420 default: 3421 error = EINVAL; 3422 break; 3423 } 3424 3425 return (error); 3426 } 3427 3428 static moduledata_t pfsync_mod = { 3429 pfsyncname, 3430 pfsync_modevent, 3431 0 3432 }; 3433 3434 #define PFSYNC_MODVER 1 3435 3436 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 3437 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 3438 MODULE_VERSION(pfsync, PFSYNC_MODVER); 3439 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 3440