1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 #include "opt_pf.h" 66 67 #include <sys/param.h> 68 #include <sys/bus.h> 69 #include <sys/endian.h> 70 #include <sys/interrupt.h> 71 #include <sys/kernel.h> 72 #include <sys/lock.h> 73 #include <sys/mbuf.h> 74 #include <sys/module.h> 75 #include <sys/mutex.h> 76 #include <sys/nv.h> 77 #include <sys/priv.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sysctl.h> 82 #include <sys/syslog.h> 83 84 #include <net/bpf.h> 85 #include <net/if.h> 86 #include <net/if_var.h> 87 #include <net/if_clone.h> 88 #include <net/if_private.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/route.h> 93 #include <net/if_pfsync.h> 94 95 #include <netinet/if_ether.h> 96 #include <netinet/in.h> 97 #include <netinet/in_var.h> 98 #include <netinet6/in6_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip6.h> 101 #include <netinet/ip_carp.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_fsm.h> 105 #include <netinet/tcp_seq.h> 106 107 #include <netinet/ip6.h> 108 #include <netinet6/ip6_var.h> 109 #include <netinet6/scope6_var.h> 110 111 #include <netpfil/pf/pfsync_nv.h> 112 113 struct pfsync_bucket; 114 struct pfsync_softc; 115 116 union inet_template { 117 struct ip ipv4; 118 struct ip6_hdr ipv6; 119 }; 120 121 #define PFSYNC_MINPKT ( \ 122 sizeof(union inet_template) + \ 123 sizeof(struct pfsync_header) + \ 124 sizeof(struct pfsync_subheader) ) 125 126 static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, 127 struct pf_state_peer_export *); 128 static int pfsync_in_clr(struct mbuf *, int, int, int, int); 129 static int pfsync_in_ins(struct mbuf *, int, int, int, int); 130 static int pfsync_in_iack(struct mbuf *, int, int, int, int); 131 static int pfsync_in_upd(struct mbuf *, int, int, int, int); 132 static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); 133 static int pfsync_in_ureq(struct mbuf *, int, int, int, int); 134 static int pfsync_in_del_c(struct mbuf *, int, int, int, int); 135 static int pfsync_in_bus(struct mbuf *, int, int, int, int); 136 static int pfsync_in_tdb(struct mbuf *, int, int, int, int); 137 static int pfsync_in_eof(struct mbuf *, int, int, int, int); 138 static int pfsync_in_error(struct mbuf *, int, int, int, int); 139 140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { 141 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 142 pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ 143 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 144 pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ 145 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 146 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL */ 148 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 149 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 150 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 151 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 152 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 153 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 154 pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ 155 pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ 156 pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */ 157 pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */ 158 }; 159 160 struct pfsync_q { 161 void (*write)(struct pf_kstate *, void *); 162 size_t len; 163 u_int8_t action; 164 }; 165 166 /* We have the following sync queues */ 167 enum pfsync_q_id { 168 PFSYNC_Q_INS_1301, 169 PFSYNC_Q_INS_1400, 170 PFSYNC_Q_INS_1500, 171 PFSYNC_Q_IACK, 172 PFSYNC_Q_UPD_1301, 173 PFSYNC_Q_UPD_1400, 174 PFSYNC_Q_UPD_1500, 175 PFSYNC_Q_UPD_C, 176 PFSYNC_Q_DEL_C, 177 PFSYNC_Q_COUNT, 178 }; 179 180 /* Functions for building messages for given queue */ 181 static void pfsync_out_state_1301(struct pf_kstate *, void *); 182 static void pfsync_out_state_1400(struct pf_kstate *, void *); 183 static void pfsync_out_state_1500(struct pf_kstate *, void *); 184 static void pfsync_out_iack(struct pf_kstate *, void *); 185 static void pfsync_out_upd_c(struct pf_kstate *, void *); 186 static void pfsync_out_del_c(struct pf_kstate *, void *); 187 188 /* Attach those functions to queue */ 189 static struct pfsync_q pfsync_qs[] = { 190 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, 191 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, 192 { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 }, 193 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 194 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, 195 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, 196 { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 }, 197 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 198 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 199 }; 200 201 /* Map queue to pf_kstate->sync_state */ 202 static u_int8_t pfsync_qid_sstate[] = { 203 PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ 204 PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ 205 PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */ 206 PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ 207 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ 208 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ 209 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */ 210 PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ 211 PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ 212 }; 213 214 /* Map pf_kstate->sync_state to queue */ 215 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); 216 217 static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); 218 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 219 220 static void pfsync_update_state(struct pf_kstate *); 221 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 222 223 struct pfsync_upd_req_item { 224 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 225 struct pfsync_upd_req ur_msg; 226 }; 227 228 struct pfsync_deferral { 229 struct pfsync_softc *pd_sc; 230 TAILQ_ENTRY(pfsync_deferral) pd_entry; 231 struct callout pd_tmo; 232 233 struct pf_kstate *pd_st; 234 struct mbuf *pd_m; 235 }; 236 237 struct pfsync_bucket 238 { 239 int b_id; 240 struct pfsync_softc *b_sc; 241 struct mtx b_mtx; 242 struct callout b_tmo; 243 int b_flags; 244 #define PFSYNCF_BUCKET_PUSH 0x00000001 245 246 size_t b_len; 247 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; 248 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 249 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 250 u_int b_deferred; 251 uint8_t *b_plus; 252 size_t b_pluslen; 253 254 struct ifaltq b_snd; 255 }; 256 257 struct pfsync_softc { 258 /* Configuration */ 259 struct ifnet *sc_ifp; 260 struct ifnet *sc_sync_if; 261 struct ip_moptions sc_imo; 262 struct ip6_moptions sc_im6o; 263 struct sockaddr_storage sc_sync_peer; 264 uint32_t sc_flags; 265 uint8_t sc_maxupdates; 266 union inet_template sc_template; 267 struct mtx sc_mtx; 268 uint32_t sc_version; 269 270 /* Queued data */ 271 struct pfsync_bucket *sc_buckets; 272 273 /* Bulk update info */ 274 struct mtx sc_bulk_mtx; 275 uint32_t sc_ureq_sent; 276 int sc_bulk_tries; 277 uint32_t sc_ureq_received; 278 int sc_bulk_hashid; 279 uint64_t sc_bulk_stateid; 280 uint32_t sc_bulk_creatorid; 281 struct callout sc_bulk_tmo; 282 struct callout sc_bulkfail_tmo; 283 }; 284 285 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 286 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 287 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 288 289 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 290 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 291 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 292 293 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 294 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 295 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 296 297 #define PFSYNC_DEFER_TIMEOUT 20 298 299 static const char pfsyncname[] = "pfsync"; 300 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 301 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 302 #define V_pfsyncif VNET(pfsyncif) 303 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 304 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 305 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 306 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 307 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 308 #define V_pfsyncstats VNET(pfsyncstats) 309 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 310 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 311 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; 312 #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) 313 314 static void pfsync_timeout(void *); 315 static void pfsync_push(struct pfsync_bucket *); 316 static void pfsync_push_all(struct pfsync_softc *); 317 static void pfsyncintr(void *); 318 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 319 struct in_mfilter *, struct in6_mfilter *); 320 static void pfsync_multicast_cleanup(struct pfsync_softc *); 321 static void pfsync_pointers_init(void); 322 static void pfsync_pointers_uninit(void); 323 static int pfsync_init(void); 324 static void pfsync_uninit(void); 325 326 static unsigned long pfsync_buckets; 327 328 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 329 "PFSYNC"); 330 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 331 &VNET_NAME(pfsyncstats), pfsyncstats, 332 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 333 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 334 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 335 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 336 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 337 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, 338 &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); 339 340 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 341 static void pfsync_clone_destroy(struct ifnet *); 342 static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, 343 struct pf_state_peer *); 344 static int pfsyncoutput(struct ifnet *, struct mbuf *, 345 const struct sockaddr *, struct route *); 346 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 347 348 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 349 static void pfsync_undefer(struct pfsync_deferral *, int); 350 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 351 static void pfsync_undefer_state(struct pf_kstate *, int); 352 static void pfsync_defer_tmo(void *); 353 354 static void pfsync_request_update(u_int32_t, u_int64_t); 355 static bool pfsync_update_state_req(struct pf_kstate *); 356 357 static void pfsync_drop_all(struct pfsync_softc *); 358 static void pfsync_drop(struct pfsync_softc *, int); 359 static void pfsync_sendout(int, int); 360 static void pfsync_send_plus(void *, size_t); 361 362 static void pfsync_bulk_start(void); 363 static void pfsync_bulk_status(u_int8_t); 364 static void pfsync_bulk_update(void *); 365 static void pfsync_bulk_fail(void *); 366 367 static void pfsync_detach_ifnet(struct ifnet *); 368 369 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 370 struct pfsync_kstatus *); 371 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 372 struct pfsync_softc *); 373 374 #ifdef IPSEC 375 static void pfsync_update_net_tdb(struct pfsync_tdb *); 376 #endif 377 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 378 struct pf_kstate *); 379 380 #define PFSYNC_MAX_BULKTRIES 12 381 382 VNET_DEFINE(struct if_clone *, pfsync_cloner); 383 #define V_pfsync_cloner VNET(pfsync_cloner) 384 385 const struct in6_addr in6addr_linklocal_pfsync_group = 386 {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; 388 static int 389 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 390 { 391 struct pfsync_softc *sc; 392 struct ifnet *ifp; 393 struct pfsync_bucket *b; 394 int c; 395 enum pfsync_q_id q; 396 397 if (unit != 0) 398 return (EINVAL); 399 400 if (! pfsync_buckets) 401 pfsync_buckets = mp_ncpus * 2; 402 403 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 404 sc->sc_flags |= PFSYNCF_OK; 405 sc->sc_maxupdates = 128; 406 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 407 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 408 M_PFSYNC, M_ZERO | M_WAITOK); 409 for (c = 0; c < pfsync_buckets; c++) { 410 b = &sc->sc_buckets[c]; 411 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 412 413 b->b_id = c; 414 b->b_sc = sc; 415 b->b_len = PFSYNC_MINPKT; 416 417 for (q = 0; q < PFSYNC_Q_COUNT; q++) 418 TAILQ_INIT(&b->b_qs[q]); 419 420 TAILQ_INIT(&b->b_upd_req_list); 421 TAILQ_INIT(&b->b_deferrals); 422 423 callout_init(&b->b_tmo, 1); 424 425 b->b_snd.ifq_maxlen = ifqmaxlen; 426 } 427 428 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 429 if_initname(ifp, pfsyncname, unit); 430 ifp->if_softc = sc; 431 ifp->if_ioctl = pfsyncioctl; 432 ifp->if_output = pfsyncoutput; 433 ifp->if_hdrlen = sizeof(struct pfsync_header); 434 ifp->if_mtu = ETHERMTU; 435 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 436 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 437 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 438 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 439 440 if_attach(ifp); 441 442 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 443 444 V_pfsyncif = sc; 445 446 return (0); 447 } 448 449 static void 450 pfsync_clone_destroy(struct ifnet *ifp) 451 { 452 struct pfsync_softc *sc = ifp->if_softc; 453 struct pfsync_bucket *b; 454 int c, ret; 455 456 for (c = 0; c < pfsync_buckets; c++) { 457 b = &sc->sc_buckets[c]; 458 /* 459 * At this stage, everything should have already been 460 * cleared by pfsync_uninit(), and we have only to 461 * drain callouts. 462 */ 463 PFSYNC_BUCKET_LOCK(b); 464 while (b->b_deferred > 0) { 465 struct pfsync_deferral *pd = 466 TAILQ_FIRST(&b->b_deferrals); 467 468 ret = callout_stop(&pd->pd_tmo); 469 if (ret > 0) { 470 pfsync_undefer(pd, 1); 471 } else { 472 PFSYNC_BUCKET_UNLOCK(b); 473 callout_drain(&pd->pd_tmo); 474 PFSYNC_BUCKET_LOCK(b); 475 } 476 } 477 MPASS(b->b_deferred == 0); 478 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 479 PFSYNC_BUCKET_UNLOCK(b); 480 481 free(b->b_plus, M_PFSYNC); 482 b->b_plus = NULL; 483 b->b_pluslen = 0; 484 485 callout_drain(&b->b_tmo); 486 } 487 488 callout_drain(&sc->sc_bulkfail_tmo); 489 callout_drain(&sc->sc_bulk_tmo); 490 491 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 492 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 493 bpfdetach(ifp); 494 if_detach(ifp); 495 496 pfsync_drop_all(sc); 497 498 if_free(ifp); 499 pfsync_multicast_cleanup(sc); 500 mtx_destroy(&sc->sc_mtx); 501 mtx_destroy(&sc->sc_bulk_mtx); 502 503 for (c = 0; c < pfsync_buckets; c++) { 504 b = &sc->sc_buckets[c]; 505 mtx_destroy(&b->b_mtx); 506 } 507 free(sc->sc_buckets, M_PFSYNC); 508 free(sc, M_PFSYNC); 509 510 V_pfsyncif = NULL; 511 } 512 513 static int 514 pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, 515 struct pf_state_peer *d) 516 { 517 if (s->scrub.scrub_flag && d->scrub == NULL) { 518 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 519 if (d->scrub == NULL) 520 return (ENOMEM); 521 } 522 523 return (0); 524 } 525 526 static int 527 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) 528 { 529 struct pfsync_softc *sc = V_pfsyncif; 530 #ifndef __NO_STRICT_ALIGNMENT 531 struct pfsync_state_key key[2]; 532 #endif 533 struct pfsync_state_key *kw, *ks; 534 struct pf_kstate *st = NULL; 535 struct pf_state_key *skw = NULL, *sks = NULL; 536 struct pf_krule *r = NULL; 537 struct pfi_kkif *kif, *orig_kif; 538 struct pfi_kkif *rt_kif = NULL; 539 struct pf_kpooladdr *rpool_first; 540 int error; 541 int n = 0; 542 sa_family_t rt_af = 0; 543 uint8_t rt = 0; 544 sa_family_t wire_af, stack_af; 545 u_int8_t wire_proto, stack_proto; 546 547 PF_RULES_RASSERT(); 548 549 if (strnlen(sp->pfs_1301.ifname, IFNAMSIZ) == IFNAMSIZ) 550 return (EINVAL); 551 552 if (sp->pfs_1301.creatorid == 0) { 553 if (V_pf_status.debug >= PF_DEBUG_MISC) 554 printf("%s: invalid creator id: %08x\n", __func__, 555 ntohl(sp->pfs_1301.creatorid)); 556 return (EINVAL); 557 } 558 559 /* 560 * Check interfaces early on. Do it before allocating memory etc. 561 * Because there is a high chance there will be a lot more such states. 562 */ 563 if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { 564 if (V_pf_status.debug >= PF_DEBUG_MISC) 565 printf("%s: unknown interface: %s\n", __func__, 566 sp->pfs_1301.ifname); 567 if (flags & PFSYNC_SI_IOCTL) 568 return (EINVAL); 569 return (0); /* skip this state */ 570 } 571 572 /* 573 * States created with floating interface policy can be synchronized to 574 * hosts with different interfaces, because they are bound to V_pfi_all. 575 * But s->orig_kif still points to a real interface. Don't abort 576 * importing the state if orig_kif does not exists on the importing host 577 * but the state is not interface-bound. 578 */ 579 if (msg_version == PFSYNC_MSG_VERSION_1500) { 580 orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname); 581 if (orig_kif == NULL) { 582 if (kif == V_pfi_all) { 583 orig_kif = kif; 584 } else { 585 if (V_pf_status.debug >= PF_DEBUG_MISC) 586 printf("%s: unknown original interface:" 587 " %s\n", __func__, 588 sp->pfs_1500.orig_ifname); 589 if (flags & PFSYNC_SI_IOCTL) 590 return (EINVAL); 591 return (0); /* skip this state */ 592 } 593 } 594 } 595 596 /* 597 * If the ruleset checksums match or the state is coming from the ioctl, 598 * it's safe to associate the state with the rule of that number. 599 */ 600 if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && 601 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < 602 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { 603 TAILQ_FOREACH(r, pf_main_ruleset.rules[ 604 PF_RULESET_FILTER].active.ptr, entries) 605 if (ntohl(sp->pfs_1301.rule) == n++) 606 break; 607 } else 608 r = &V_pf_default_rule; 609 610 switch (msg_version) { 611 case PFSYNC_MSG_VERSION_1301: 612 /* 613 * On FreeBSD <= 13 the routing interface and routing operation 614 * are not sent over pfsync. If the ruleset is identical, 615 * though, we might be able to recover the routing information 616 * from the local ruleset. 617 */ 618 if (r != &V_pf_default_rule) { 619 struct pf_kpool *pool = &r->route; 620 621 /* Backwards compatibility. */ 622 if (TAILQ_EMPTY(&pool->list)) 623 pool = &r->rdr; 624 625 /* 626 * The ruleset is identical, try to recover. If the rule 627 * has a redirection pool with a single interface, there 628 * is a chance that this interface is identical as on 629 * the pfsync peer. If there's more than one interface, 630 * give up, as we can't be sure that we will pick the 631 * same one as the pfsync peer did. 632 */ 633 rpool_first = TAILQ_FIRST(&(pool->list)); 634 if ((rpool_first == NULL) || 635 (TAILQ_NEXT(rpool_first, entries) != NULL)) { 636 DPFPRINTF(PF_DEBUG_MISC, 637 "%s: can't recover routing information " 638 "because of empty or bad redirection pool", 639 __func__); 640 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 641 } 642 rt = r->rt; 643 rt_kif = rpool_first->kif; 644 /* 645 * Guess the AF of the route address, FreeBSD 13 does 646 * not support af-to nor prefer-ipv6-nexthop 647 * so it should be safe. 648 */ 649 rt_af = r->af; 650 } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { 651 /* 652 * Ruleset different, routing *supposedly* requested, 653 * give up on recovering. 654 */ 655 DPFPRINTF(PF_DEBUG_MISC, 656 "%s: can't recover routing information " 657 "because of different ruleset", __func__); 658 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 659 } 660 wire_af = stack_af = sp->pfs_1301.af; 661 wire_proto = stack_proto = sp->pfs_1301.proto; 662 break; 663 case PFSYNC_MSG_VERSION_1400: 664 /* 665 * On FreeBSD 14 we're not taking any chances. 666 * We use the information synced to us. 667 */ 668 if (sp->pfs_1400.rt) { 669 rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); 670 if (rt_kif == NULL) { 671 DPFPRINTF(PF_DEBUG_MISC, 672 "%s: unknown route interface: %s", 673 __func__, sp->pfs_1400.rt_ifname); 674 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 675 } 676 rt = sp->pfs_1400.rt; 677 /* 678 * Guess the AF of the route address, FreeBSD 14 does 679 * not support af-to nor prefer-ipv6-nexthop 680 * so it should be safe. 681 */ 682 rt_af = sp->pfs_1400.af; 683 } 684 wire_af = stack_af = sp->pfs_1400.af; 685 wire_proto = stack_proto = sp->pfs_1400.proto; 686 break; 687 case PFSYNC_MSG_VERSION_1500: 688 /* 689 * On FreeBSD 15 and above we're not taking any chances. 690 * We use the information synced to us. 691 */ 692 if (sp->pfs_1500.rt) { 693 rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname); 694 if (rt_kif == NULL) { 695 DPFPRINTF(PF_DEBUG_MISC, 696 "%s: unknown route interface: %s", 697 __func__, sp->pfs_1500.rt_ifname); 698 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 699 } 700 rt = sp->pfs_1500.rt; 701 rt_af = sp->pfs_1500.rt_af; 702 } 703 wire_af = sp->pfs_1500.wire_af; 704 stack_af = sp->pfs_1500.stack_af; 705 wire_proto = sp->pfs_1500.wire_proto; 706 stack_proto = sp->pfs_1500.stack_proto; 707 break; 708 } 709 710 if ((r->max_states && 711 counter_u64_fetch(r->states_cur) >= r->max_states)) 712 goto cleanup; 713 714 /* 715 * XXXGL: consider M_WAITOK in ioctl path after. 716 */ 717 st = pf_alloc_state(M_NOWAIT); 718 if (__predict_false(st == NULL)) 719 goto cleanup; 720 721 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 722 goto cleanup; 723 724 #ifndef __NO_STRICT_ALIGNMENT 725 bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); 726 kw = &key[PF_SK_WIRE]; 727 ks = &key[PF_SK_STACK]; 728 #else 729 kw = &sp->pfs_1301.key[PF_SK_WIRE]; 730 ks = &sp->pfs_1301.key[PF_SK_STACK]; 731 #endif 732 733 if (wire_af != stack_af || 734 PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) || 735 PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) || 736 kw->port[0] != ks->port[0] || 737 kw->port[1] != ks->port[1]) { 738 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 739 if (sks == NULL) 740 goto cleanup; 741 } else 742 sks = skw; 743 744 /* allocate memory for scrub info */ 745 if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || 746 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) 747 goto cleanup; 748 749 /* Copy to state key(s). */ 750 skw->addr[0] = kw->addr[0]; 751 skw->addr[1] = kw->addr[1]; 752 skw->port[0] = kw->port[0]; 753 skw->port[1] = kw->port[1]; 754 skw->proto = wire_proto; 755 skw->af = wire_af; 756 if (sks != skw) { 757 sks->addr[0] = ks->addr[0]; 758 sks->addr[1] = ks->addr[1]; 759 sks->port[0] = ks->port[0]; 760 sks->port[1] = ks->port[1]; 761 sks->proto = stack_proto; 762 sks->af = stack_af; 763 } 764 765 /* copy to state */ 766 st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; 767 st->act.rt = rt; 768 st->act.rt_kif = rt_kif; 769 st->act.rt_af = rt_af; 770 771 switch (msg_version) { 772 case PFSYNC_MSG_VERSION_1301: 773 st->state_flags = sp->pfs_1301.state_flags; 774 st->direction = sp->pfs_1301.direction; 775 st->act.log = sp->pfs_1301.log; 776 st->timeout = sp->pfs_1301.timeout; 777 if (rt) 778 bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, 779 sizeof(st->act.rt_addr)); 780 /* 781 * In FreeBSD 13 pfsync lacks many attributes. Copy them 782 * from the rule if possible. If rule can't be matched 783 * clear any set options as we can't recover their 784 * parameters. 785 */ 786 if (r == &V_pf_default_rule) { 787 st->state_flags &= ~PFSTATE_SETMASK; 788 } else { 789 /* 790 * Similar to pf_rule_to_actions(). This code 791 * won't set the actions properly if they come 792 * from multiple "match" rules as only rule 793 * creating the state is send over pfsync. 794 */ 795 st->act.qid = r->qid; 796 st->act.pqid = r->pqid; 797 st->act.rtableid = r->rtableid; 798 if (r->scrub_flags & PFSTATE_SETTOS) 799 st->act.set_tos = r->set_tos; 800 st->act.min_ttl = r->min_ttl; 801 st->act.max_mss = r->max_mss; 802 st->state_flags |= (r->scrub_flags & 803 (PFSTATE_NODF|PFSTATE_RANDOMID| 804 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| 805 PFSTATE_SETPRIO)); 806 if (r->dnpipe || r->dnrpipe) { 807 if (r->free_flags & PFRULE_DN_IS_PIPE) 808 st->state_flags |= PFSTATE_DN_IS_PIPE; 809 else 810 st->state_flags &= ~PFSTATE_DN_IS_PIPE; 811 } 812 st->act.dnpipe = r->dnpipe; 813 st->act.dnrpipe = r->dnrpipe; 814 } 815 break; 816 case PFSYNC_MSG_VERSION_1400: 817 st->state_flags = ntohs(sp->pfs_1400.state_flags); 818 st->direction = sp->pfs_1400.direction; 819 st->act.log = sp->pfs_1400.log; 820 st->timeout = sp->pfs_1400.timeout; 821 st->act.qid = ntohs(sp->pfs_1400.qid); 822 st->act.pqid = ntohs(sp->pfs_1400.pqid); 823 st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); 824 st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); 825 st->act.rtableid = ntohl(sp->pfs_1400.rtableid); 826 st->act.min_ttl = sp->pfs_1400.min_ttl; 827 st->act.set_tos = sp->pfs_1400.set_tos; 828 st->act.max_mss = ntohs(sp->pfs_1400.max_mss); 829 st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; 830 st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; 831 if (rt) 832 bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr, 833 sizeof(st->act.rt_addr)); 834 break; 835 case PFSYNC_MSG_VERSION_1500: 836 st->state_flags = ntohs(sp->pfs_1500.state_flags); 837 st->direction = sp->pfs_1500.direction; 838 st->act.log = sp->pfs_1500.log; 839 st->timeout = sp->pfs_1500.timeout; 840 st->act.qid = ntohs(sp->pfs_1500.qid); 841 st->act.pqid = ntohs(sp->pfs_1500.pqid); 842 st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe); 843 st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe); 844 st->act.rtableid = ntohl(sp->pfs_1500.rtableid); 845 st->act.min_ttl = sp->pfs_1500.min_ttl; 846 st->act.set_tos = sp->pfs_1500.set_tos; 847 st->act.max_mss = ntohs(sp->pfs_1500.max_mss); 848 st->act.set_prio[0] = sp->pfs_1500.set_prio[0]; 849 st->act.set_prio[1] = sp->pfs_1500.set_prio[1]; 850 if (rt) 851 bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr, 852 sizeof(st->act.rt_addr)); 853 if (sp->pfs_1500.tagname[0] != 0) 854 st->tag = pf_tagname2tag(sp->pfs_1500.tagname); 855 break; 856 default: 857 panic("%s: Unsupported pfsync_msg_version %d", 858 __func__, msg_version); 859 } 860 861 st->expire = pf_get_uptime(); 862 if (sp->pfs_1301.expire) { 863 uint32_t timeout; 864 timeout = r->timeout[st->timeout]; 865 if (!timeout) 866 timeout = V_pf_default_rule.timeout[st->timeout]; 867 868 /* sp->expire may have been adaptively scaled by export. */ 869 st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; 870 } 871 872 if (! (st->act.rtableid == -1 || 873 (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) 874 goto cleanup; 875 876 st->id = sp->pfs_1301.id; 877 st->creatorid = sp->pfs_1301.creatorid; 878 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 879 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 880 881 st->rule = r; 882 st->nat_rule = NULL; 883 st->anchor = NULL; 884 885 st->pfsync_time = time_uptime; 886 st->sync_state = PFSYNC_S_NONE; 887 888 if (!(flags & PFSYNC_SI_IOCTL)) 889 st->state_flags |= PFSTATE_NOSYNC; 890 891 if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0) 892 goto cleanup_state; 893 894 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 895 counter_u64_add(r->states_cur, 1); 896 counter_u64_add(r->states_tot, 1); 897 898 if (!(flags & PFSYNC_SI_IOCTL)) { 899 st->state_flags &= ~PFSTATE_NOSYNC; 900 if (st->state_flags & PFSTATE_ACK) { 901 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 902 PFSYNC_BUCKET_LOCK(b); 903 pfsync_q_ins(st, PFSYNC_S_IACK, true); 904 PFSYNC_BUCKET_UNLOCK(b); 905 906 pfsync_push_all(sc); 907 } 908 } 909 st->state_flags &= ~PFSTATE_ACK; 910 PF_STATE_UNLOCK(st); 911 912 return (0); 913 914 cleanup: 915 error = ENOMEM; 916 917 if (skw == sks) 918 sks = NULL; 919 uma_zfree(V_pf_state_key_z, skw); 920 uma_zfree(V_pf_state_key_z, sks); 921 922 cleanup_state: /* pf_state_insert() frees the state keys. */ 923 if (st) { 924 st->timeout = PFTM_UNLINKED; /* appease an assert */ 925 pf_free_state(st); 926 } 927 return (error); 928 } 929 930 #ifdef INET 931 static int 932 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 933 { 934 struct pfsync_softc *sc = V_pfsyncif; 935 struct mbuf *m = *mp; 936 struct ip *ip = mtod(m, struct ip *); 937 struct pfsync_header *ph; 938 struct pfsync_subheader subh; 939 940 int offset, len, flags = 0; 941 int rv; 942 uint16_t count; 943 944 PF_RULES_RLOCK_TRACKER; 945 946 *mp = NULL; 947 V_pfsyncstats.pfsyncs_ipackets++; 948 949 /* Verify that we have a sync interface configured. */ 950 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 951 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 952 goto done; 953 954 /* verify that the packet came in on the right interface */ 955 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 956 V_pfsyncstats.pfsyncs_badif++; 957 goto done; 958 } 959 960 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 961 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 962 /* verify that the IP TTL is 255. */ 963 if (ip->ip_ttl != PFSYNC_DFLTTL) { 964 V_pfsyncstats.pfsyncs_badttl++; 965 goto done; 966 } 967 968 offset = ip->ip_hl << 2; 969 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 970 V_pfsyncstats.pfsyncs_hdrops++; 971 goto done; 972 } 973 974 if (offset + sizeof(*ph) > m->m_len) { 975 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 976 V_pfsyncstats.pfsyncs_hdrops++; 977 return (IPPROTO_DONE); 978 } 979 ip = mtod(m, struct ip *); 980 } 981 ph = (struct pfsync_header *)((char *)ip + offset); 982 983 /* verify the version */ 984 if (ph->version != PFSYNC_VERSION) { 985 V_pfsyncstats.pfsyncs_badver++; 986 goto done; 987 } 988 989 len = ntohs(ph->len) + offset; 990 if (m->m_pkthdr.len < len) { 991 V_pfsyncstats.pfsyncs_badlen++; 992 goto done; 993 } 994 995 /* 996 * Trusting pf_chksum during packet processing, as well as seeking 997 * in interface name tree, require holding PF_RULES_RLOCK(). 998 */ 999 PF_RULES_RLOCK(); 1000 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1001 flags = PFSYNC_SI_CKSUM; 1002 1003 offset += sizeof(*ph); 1004 while (offset <= len - sizeof(subh)) { 1005 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1006 offset += sizeof(subh); 1007 1008 if (subh.action >= PFSYNC_ACT_MAX) { 1009 V_pfsyncstats.pfsyncs_badact++; 1010 PF_RULES_RUNLOCK(); 1011 goto done; 1012 } 1013 1014 count = ntohs(subh.count); 1015 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1016 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1017 if (rv == -1) { 1018 PF_RULES_RUNLOCK(); 1019 return (IPPROTO_DONE); 1020 } 1021 1022 offset += rv; 1023 } 1024 PF_RULES_RUNLOCK(); 1025 1026 done: 1027 m_freem(m); 1028 return (IPPROTO_DONE); 1029 } 1030 #endif 1031 1032 #ifdef INET6 1033 static int 1034 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) 1035 { 1036 struct pfsync_softc *sc = V_pfsyncif; 1037 struct mbuf *m = *mp; 1038 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1039 struct pfsync_header *ph; 1040 struct pfsync_subheader subh; 1041 1042 int offset, len, flags = 0; 1043 int rv; 1044 uint16_t count; 1045 1046 PF_RULES_RLOCK_TRACKER; 1047 1048 *mp = NULL; 1049 V_pfsyncstats.pfsyncs_ipackets++; 1050 1051 /* Verify that we have a sync interface configured. */ 1052 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 1053 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1054 goto done; 1055 1056 /* verify that the packet came in on the right interface */ 1057 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 1058 V_pfsyncstats.pfsyncs_badif++; 1059 goto done; 1060 } 1061 1062 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 1063 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1064 /* verify that the IP TTL is 255. */ 1065 if (ip6->ip6_hlim != PFSYNC_DFLTTL) { 1066 V_pfsyncstats.pfsyncs_badttl++; 1067 goto done; 1068 } 1069 1070 1071 offset = sizeof(*ip6); 1072 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 1073 V_pfsyncstats.pfsyncs_hdrops++; 1074 goto done; 1075 } 1076 1077 if (offset + sizeof(*ph) > m->m_len) { 1078 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 1079 V_pfsyncstats.pfsyncs_hdrops++; 1080 return (IPPROTO_DONE); 1081 } 1082 ip6 = mtod(m, struct ip6_hdr *); 1083 } 1084 ph = (struct pfsync_header *)((char *)ip6 + offset); 1085 1086 /* verify the version */ 1087 if (ph->version != PFSYNC_VERSION) { 1088 V_pfsyncstats.pfsyncs_badver++; 1089 goto done; 1090 } 1091 1092 len = ntohs(ph->len) + offset; 1093 if (m->m_pkthdr.len < len) { 1094 V_pfsyncstats.pfsyncs_badlen++; 1095 goto done; 1096 } 1097 1098 /* 1099 * Trusting pf_chksum during packet processing, as well as seeking 1100 * in interface name tree, require holding PF_RULES_RLOCK(). 1101 */ 1102 PF_RULES_RLOCK(); 1103 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1104 flags = PFSYNC_SI_CKSUM; 1105 1106 offset += sizeof(*ph); 1107 while (offset <= len - sizeof(subh)) { 1108 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1109 offset += sizeof(subh); 1110 1111 if (subh.action >= PFSYNC_ACT_MAX) { 1112 V_pfsyncstats.pfsyncs_badact++; 1113 PF_RULES_RUNLOCK(); 1114 goto done; 1115 } 1116 1117 count = ntohs(subh.count); 1118 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1119 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1120 if (rv == -1) { 1121 PF_RULES_RUNLOCK(); 1122 return (IPPROTO_DONE); 1123 } 1124 1125 offset += rv; 1126 } 1127 PF_RULES_RUNLOCK(); 1128 1129 done: 1130 m_freem(m); 1131 return (IPPROTO_DONE); 1132 } 1133 #endif 1134 1135 static int 1136 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) 1137 { 1138 struct pfsync_clr *clr; 1139 struct mbuf *mp; 1140 int len = sizeof(*clr) * count; 1141 int i, offp; 1142 u_int32_t creatorid; 1143 1144 mp = m_pulldown(m, offset, len, &offp); 1145 if (mp == NULL) { 1146 V_pfsyncstats.pfsyncs_badlen++; 1147 return (-1); 1148 } 1149 clr = (struct pfsync_clr *)(mp->m_data + offp); 1150 1151 for (i = 0; i < count; i++) { 1152 creatorid = clr[i].creatorid; 1153 1154 if (clr[i].ifname[0] != '\0' && 1155 pfi_kkif_find(clr[i].ifname) == NULL) 1156 continue; 1157 1158 for (int i = 0; i <= V_pf_hashmask; i++) { 1159 struct pf_idhash *ih = &V_pf_idhash[i]; 1160 struct pf_kstate *s; 1161 relock: 1162 PF_HASHROW_LOCK(ih); 1163 LIST_FOREACH(s, &ih->states, entry) { 1164 if (s->creatorid == creatorid) { 1165 s->state_flags |= PFSTATE_NOSYNC; 1166 pf_remove_state(s); 1167 goto relock; 1168 } 1169 } 1170 PF_HASHROW_UNLOCK(ih); 1171 } 1172 } 1173 1174 return (len); 1175 } 1176 1177 static int 1178 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) 1179 { 1180 struct mbuf *mp; 1181 union pfsync_state_union *sa, *sp; 1182 int i, offp, total_len, msg_version, msg_len; 1183 u_int8_t timeout, direction; 1184 sa_family_t af; 1185 1186 switch (action) { 1187 case PFSYNC_ACT_INS_1301: 1188 msg_len = sizeof(struct pfsync_state_1301); 1189 msg_version = PFSYNC_MSG_VERSION_1301; 1190 break; 1191 case PFSYNC_ACT_INS_1400: 1192 msg_len = sizeof(struct pfsync_state_1400); 1193 msg_version = PFSYNC_MSG_VERSION_1400; 1194 break; 1195 case PFSYNC_ACT_INS_1500: 1196 msg_len = sizeof(struct pfsync_state_1500); 1197 msg_version = PFSYNC_MSG_VERSION_1500; 1198 break; 1199 default: 1200 V_pfsyncstats.pfsyncs_badver++; 1201 return (-1); 1202 } 1203 1204 total_len = msg_len * count; 1205 1206 mp = m_pulldown(m, offset, total_len, &offp); 1207 if (mp == NULL) { 1208 V_pfsyncstats.pfsyncs_badlen++; 1209 return (-1); 1210 } 1211 sa = (union pfsync_state_union *)(mp->m_data + offp); 1212 1213 for (i = 0; i < count; i++) { 1214 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1215 1216 switch (msg_version) { 1217 case PFSYNC_MSG_VERSION_1301: 1218 case PFSYNC_MSG_VERSION_1400: 1219 af = sp->pfs_1301.af; 1220 timeout = sp->pfs_1301.timeout; 1221 direction = sp->pfs_1301.direction; 1222 break; 1223 case PFSYNC_MSG_VERSION_1500: 1224 af = sp->pfs_1500.wire_af; 1225 timeout = sp->pfs_1500.timeout; 1226 direction = sp->pfs_1500.direction; 1227 break; 1228 } 1229 1230 /* Check for invalid values. */ 1231 if (timeout >= PFTM_MAX || 1232 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1233 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || 1234 direction > PF_OUT || 1235 (af != AF_INET && af != AF_INET6)) { 1236 if (V_pf_status.debug >= PF_DEBUG_MISC) 1237 printf("%s: invalid value\n", __func__); 1238 V_pfsyncstats.pfsyncs_badval++; 1239 continue; 1240 } 1241 1242 if (pfsync_state_import(sp, flags, msg_version) != 0) 1243 V_pfsyncstats.pfsyncs_badact++; 1244 } 1245 1246 return (total_len); 1247 } 1248 1249 static int 1250 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) 1251 { 1252 struct pfsync_ins_ack *ia, *iaa; 1253 struct pf_kstate *st; 1254 1255 struct mbuf *mp; 1256 int len = count * sizeof(*ia); 1257 int offp, i; 1258 1259 mp = m_pulldown(m, offset, len, &offp); 1260 if (mp == NULL) { 1261 V_pfsyncstats.pfsyncs_badlen++; 1262 return (-1); 1263 } 1264 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 1265 1266 for (i = 0; i < count; i++) { 1267 ia = &iaa[i]; 1268 1269 st = pf_find_state_byid(ia->id, ia->creatorid); 1270 if (st == NULL) 1271 continue; 1272 1273 if (st->state_flags & PFSTATE_ACK) { 1274 pfsync_undefer_state(st, 0); 1275 } 1276 PF_STATE_UNLOCK(st); 1277 } 1278 /* 1279 * XXX this is not yet implemented, but we know the size of the 1280 * message so we can skip it. 1281 */ 1282 1283 return (count * sizeof(struct pfsync_ins_ack)); 1284 } 1285 1286 static int 1287 pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, 1288 struct pf_state_peer_export *dst) 1289 { 1290 int sync = 0; 1291 1292 PF_STATE_LOCK_ASSERT(st); 1293 1294 /* 1295 * The state should never go backwards except 1296 * for syn-proxy states. Neither should the 1297 * sequence window slide backwards. 1298 */ 1299 if ((st->src.state > src->state && 1300 (st->src.state < PF_TCPS_PROXY_SRC || 1301 src->state >= PF_TCPS_PROXY_SRC)) || 1302 1303 (st->src.state == src->state && 1304 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 1305 sync++; 1306 else 1307 pf_state_peer_ntoh(src, &st->src); 1308 1309 if ((st->dst.state > dst->state) || 1310 1311 (st->dst.state >= TCPS_SYN_SENT && 1312 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 1313 sync++; 1314 else 1315 pf_state_peer_ntoh(dst, &st->dst); 1316 1317 return (sync); 1318 } 1319 1320 static int 1321 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) 1322 { 1323 struct pfsync_softc *sc = V_pfsyncif; 1324 union pfsync_state_union *sa, *sp; 1325 struct pf_kstate *st; 1326 struct mbuf *mp; 1327 int sync, offp, i, total_len, msg_len, msg_version; 1328 u_int8_t timeout; 1329 1330 switch (action) { 1331 case PFSYNC_ACT_UPD_1301: 1332 msg_len = sizeof(struct pfsync_state_1301); 1333 msg_version = PFSYNC_MSG_VERSION_1301; 1334 break; 1335 case PFSYNC_ACT_UPD_1400: 1336 msg_len = sizeof(struct pfsync_state_1400); 1337 msg_version = PFSYNC_MSG_VERSION_1400; 1338 break; 1339 case PFSYNC_ACT_UPD_1500: 1340 msg_len = sizeof(struct pfsync_state_1500); 1341 msg_version = PFSYNC_MSG_VERSION_1500; 1342 break; 1343 default: 1344 V_pfsyncstats.pfsyncs_badact++; 1345 return (-1); 1346 } 1347 1348 total_len = msg_len * count; 1349 1350 mp = m_pulldown(m, offset, total_len, &offp); 1351 if (mp == NULL) { 1352 V_pfsyncstats.pfsyncs_badlen++; 1353 return (-1); 1354 } 1355 sa = (union pfsync_state_union *)(mp->m_data + offp); 1356 1357 for (i = 0; i < count; i++) { 1358 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1359 1360 switch (msg_version) { 1361 case PFSYNC_MSG_VERSION_1301: 1362 case PFSYNC_MSG_VERSION_1400: 1363 timeout = sp->pfs_1301.timeout; 1364 break; 1365 case PFSYNC_MSG_VERSION_1500: 1366 timeout = sp->pfs_1500.timeout; 1367 break; 1368 } 1369 1370 /* check for invalid values */ 1371 if (timeout >= PFTM_MAX || 1372 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1373 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { 1374 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1375 printf("pfsync_input: PFSYNC_ACT_UPD: " 1376 "invalid value\n"); 1377 } 1378 V_pfsyncstats.pfsyncs_badval++; 1379 continue; 1380 } 1381 1382 st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); 1383 if (st == NULL) { 1384 /* insert the update */ 1385 if (pfsync_state_import(sp, flags, msg_version)) 1386 V_pfsyncstats.pfsyncs_badstate++; 1387 continue; 1388 } 1389 1390 if (st->state_flags & PFSTATE_ACK) { 1391 pfsync_undefer_state(st, 1); 1392 } 1393 1394 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1395 sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); 1396 else { 1397 sync = 0; 1398 1399 /* 1400 * Non-TCP protocol state machine always go 1401 * forwards 1402 */ 1403 if (st->src.state > sp->pfs_1301.src.state) 1404 sync++; 1405 else 1406 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 1407 if (st->dst.state > sp->pfs_1301.dst.state) 1408 sync++; 1409 else 1410 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1411 } 1412 if (sync < 2) { 1413 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); 1414 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1415 st->expire = pf_get_uptime(); 1416 st->timeout = timeout; 1417 } 1418 st->pfsync_time = time_uptime; 1419 1420 if (sync) { 1421 V_pfsyncstats.pfsyncs_stale++; 1422 1423 pfsync_update_state(st); 1424 PF_STATE_UNLOCK(st); 1425 pfsync_push_all(sc); 1426 continue; 1427 } 1428 PF_STATE_UNLOCK(st); 1429 } 1430 1431 return (total_len); 1432 } 1433 1434 static int 1435 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) 1436 { 1437 struct pfsync_softc *sc = V_pfsyncif; 1438 struct pfsync_upd_c *ua, *up; 1439 struct pf_kstate *st; 1440 int len = count * sizeof(*up); 1441 int sync; 1442 struct mbuf *mp; 1443 int offp, i; 1444 1445 mp = m_pulldown(m, offset, len, &offp); 1446 if (mp == NULL) { 1447 V_pfsyncstats.pfsyncs_badlen++; 1448 return (-1); 1449 } 1450 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1451 1452 for (i = 0; i < count; i++) { 1453 up = &ua[i]; 1454 1455 /* check for invalid values */ 1456 if (up->timeout >= PFTM_MAX || 1457 up->src.state > PF_TCPS_PROXY_DST || 1458 up->dst.state > PF_TCPS_PROXY_DST) { 1459 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1460 printf("pfsync_input: " 1461 "PFSYNC_ACT_UPD_C: " 1462 "invalid value\n"); 1463 } 1464 V_pfsyncstats.pfsyncs_badval++; 1465 continue; 1466 } 1467 1468 st = pf_find_state_byid(up->id, up->creatorid); 1469 if (st == NULL) { 1470 /* We don't have this state. Ask for it. */ 1471 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1472 pfsync_request_update(up->creatorid, up->id); 1473 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1474 continue; 1475 } 1476 1477 if (st->state_flags & PFSTATE_ACK) { 1478 pfsync_undefer_state(st, 1); 1479 } 1480 1481 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1482 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1483 else { 1484 sync = 0; 1485 1486 /* 1487 * Non-TCP protocol state machine always go 1488 * forwards 1489 */ 1490 if (st->src.state > up->src.state) 1491 sync++; 1492 else 1493 pf_state_peer_ntoh(&up->src, &st->src); 1494 if (st->dst.state > up->dst.state) 1495 sync++; 1496 else 1497 pf_state_peer_ntoh(&up->dst, &st->dst); 1498 } 1499 if (sync < 2) { 1500 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1501 pf_state_peer_ntoh(&up->dst, &st->dst); 1502 st->expire = pf_get_uptime(); 1503 st->timeout = up->timeout; 1504 } 1505 st->pfsync_time = time_uptime; 1506 1507 if (sync) { 1508 V_pfsyncstats.pfsyncs_stale++; 1509 1510 pfsync_update_state(st); 1511 PF_STATE_UNLOCK(st); 1512 pfsync_push_all(sc); 1513 continue; 1514 } 1515 PF_STATE_UNLOCK(st); 1516 } 1517 1518 return (len); 1519 } 1520 1521 static int 1522 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) 1523 { 1524 struct pfsync_upd_req *ur, *ura; 1525 struct mbuf *mp; 1526 int len = count * sizeof(*ur); 1527 int i, offp; 1528 1529 struct pf_kstate *st; 1530 1531 mp = m_pulldown(m, offset, len, &offp); 1532 if (mp == NULL) { 1533 V_pfsyncstats.pfsyncs_badlen++; 1534 return (-1); 1535 } 1536 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1537 1538 for (i = 0; i < count; i++) { 1539 ur = &ura[i]; 1540 1541 if (ur->id == 0 && ur->creatorid == 0) 1542 pfsync_bulk_start(); 1543 else { 1544 st = pf_find_state_byid(ur->id, ur->creatorid); 1545 if (st == NULL) { 1546 V_pfsyncstats.pfsyncs_badstate++; 1547 continue; 1548 } 1549 if (st->state_flags & PFSTATE_NOSYNC) { 1550 PF_STATE_UNLOCK(st); 1551 continue; 1552 } 1553 1554 pfsync_update_state_req(st); 1555 PF_STATE_UNLOCK(st); 1556 } 1557 } 1558 1559 return (len); 1560 } 1561 1562 static int 1563 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) 1564 { 1565 struct mbuf *mp; 1566 struct pfsync_del_c *sa, *sp; 1567 struct pf_kstate *st; 1568 int len = count * sizeof(*sp); 1569 int offp, i; 1570 1571 mp = m_pulldown(m, offset, len, &offp); 1572 if (mp == NULL) { 1573 V_pfsyncstats.pfsyncs_badlen++; 1574 return (-1); 1575 } 1576 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1577 1578 for (i = 0; i < count; i++) { 1579 sp = &sa[i]; 1580 1581 st = pf_find_state_byid(sp->id, sp->creatorid); 1582 if (st == NULL) { 1583 V_pfsyncstats.pfsyncs_badstate++; 1584 continue; 1585 } 1586 1587 st->state_flags |= PFSTATE_NOSYNC; 1588 pf_remove_state(st); 1589 } 1590 1591 return (len); 1592 } 1593 1594 static int 1595 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) 1596 { 1597 struct pfsync_softc *sc = V_pfsyncif; 1598 struct pfsync_bus *bus; 1599 struct mbuf *mp; 1600 int len = count * sizeof(*bus); 1601 int offp; 1602 1603 PFSYNC_BLOCK(sc); 1604 1605 /* If we're not waiting for a bulk update, who cares. */ 1606 if (sc->sc_ureq_sent == 0) { 1607 PFSYNC_BUNLOCK(sc); 1608 return (len); 1609 } 1610 1611 mp = m_pulldown(m, offset, len, &offp); 1612 if (mp == NULL) { 1613 PFSYNC_BUNLOCK(sc); 1614 V_pfsyncstats.pfsyncs_badlen++; 1615 return (-1); 1616 } 1617 bus = (struct pfsync_bus *)(mp->m_data + offp); 1618 1619 switch (bus->status) { 1620 case PFSYNC_BUS_START: 1621 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1622 V_pf_limits[PF_LIMIT_STATES].limit / 1623 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1624 sizeof(union pfsync_state_union)), 1625 pfsync_bulk_fail, sc); 1626 if (V_pf_status.debug >= PF_DEBUG_MISC) 1627 printf("pfsync: received bulk update start\n"); 1628 break; 1629 1630 case PFSYNC_BUS_END: 1631 if (time_uptime - ntohl(bus->endtime) >= 1632 sc->sc_ureq_sent) { 1633 /* that's it, we're happy */ 1634 sc->sc_ureq_sent = 0; 1635 sc->sc_bulk_tries = 0; 1636 callout_stop(&sc->sc_bulkfail_tmo); 1637 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1638 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1639 "pfsync bulk done"); 1640 sc->sc_flags |= PFSYNCF_OK; 1641 if (V_pf_status.debug >= PF_DEBUG_MISC) 1642 printf("pfsync: received valid " 1643 "bulk update end\n"); 1644 } else { 1645 if (V_pf_status.debug >= PF_DEBUG_MISC) 1646 printf("pfsync: received invalid " 1647 "bulk update end: bad timestamp\n"); 1648 } 1649 break; 1650 } 1651 PFSYNC_BUNLOCK(sc); 1652 1653 return (len); 1654 } 1655 1656 static int 1657 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) 1658 { 1659 int len = count * sizeof(struct pfsync_tdb); 1660 1661 #if defined(IPSEC) 1662 struct pfsync_tdb *tp; 1663 struct mbuf *mp; 1664 int offp; 1665 int i; 1666 int s; 1667 1668 mp = m_pulldown(m, offset, len, &offp); 1669 if (mp == NULL) { 1670 V_pfsyncstats.pfsyncs_badlen++; 1671 return (-1); 1672 } 1673 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1674 1675 for (i = 0; i < count; i++) 1676 pfsync_update_net_tdb(&tp[i]); 1677 #endif 1678 1679 return (len); 1680 } 1681 1682 #if defined(IPSEC) 1683 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1684 static void 1685 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1686 { 1687 struct tdb *tdb; 1688 int s; 1689 1690 /* check for invalid values */ 1691 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1692 (pt->dst.sa.sa_family != AF_INET && 1693 pt->dst.sa.sa_family != AF_INET6)) 1694 goto bad; 1695 1696 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1697 if (tdb) { 1698 pt->rpl = ntohl(pt->rpl); 1699 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1700 1701 /* Neither replay nor byte counter should ever decrease. */ 1702 if (pt->rpl < tdb->tdb_rpl || 1703 pt->cur_bytes < tdb->tdb_cur_bytes) { 1704 goto bad; 1705 } 1706 1707 tdb->tdb_rpl = pt->rpl; 1708 tdb->tdb_cur_bytes = pt->cur_bytes; 1709 } 1710 return; 1711 1712 bad: 1713 if (V_pf_status.debug >= PF_DEBUG_MISC) 1714 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1715 "invalid value\n"); 1716 V_pfsyncstats.pfsyncs_badstate++; 1717 return; 1718 } 1719 #endif 1720 1721 static int 1722 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) 1723 { 1724 /* check if we are at the right place in the packet */ 1725 if (offset != m->m_pkthdr.len) 1726 V_pfsyncstats.pfsyncs_badlen++; 1727 1728 /* we're done. free and let the caller return */ 1729 m_freem(m); 1730 return (-1); 1731 } 1732 1733 static int 1734 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) 1735 { 1736 V_pfsyncstats.pfsyncs_badact++; 1737 1738 m_freem(m); 1739 return (-1); 1740 } 1741 1742 static int 1743 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1744 struct route *rt) 1745 { 1746 m_freem(m); 1747 return (0); 1748 } 1749 1750 /* ARGSUSED */ 1751 static int 1752 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1753 { 1754 struct pfsync_softc *sc = ifp->if_softc; 1755 struct ifreq *ifr = (struct ifreq *)data; 1756 struct pfsyncreq pfsyncr; 1757 size_t nvbuflen; 1758 int error; 1759 int c; 1760 1761 switch (cmd) { 1762 case SIOCSIFFLAGS: 1763 PFSYNC_LOCK(sc); 1764 if (ifp->if_flags & IFF_UP) { 1765 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1766 PFSYNC_UNLOCK(sc); 1767 pfsync_pointers_init(); 1768 } else { 1769 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1770 PFSYNC_UNLOCK(sc); 1771 pfsync_pointers_uninit(); 1772 } 1773 break; 1774 case SIOCSIFMTU: 1775 if (!sc->sc_sync_if || 1776 ifr->ifr_mtu <= PFSYNC_MINPKT || 1777 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1778 return (EINVAL); 1779 if (ifr->ifr_mtu < ifp->if_mtu) { 1780 for (c = 0; c < pfsync_buckets; c++) { 1781 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1782 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1783 pfsync_sendout(1, c); 1784 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1785 } 1786 } 1787 ifp->if_mtu = ifr->ifr_mtu; 1788 break; 1789 case SIOCGETPFSYNC: 1790 bzero(&pfsyncr, sizeof(pfsyncr)); 1791 PFSYNC_LOCK(sc); 1792 if (sc->sc_sync_if) { 1793 strlcpy(pfsyncr.pfsyncr_syncdev, 1794 sc->sc_sync_if->if_xname, IFNAMSIZ); 1795 } 1796 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1797 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1798 pfsyncr.pfsyncr_defer = sc->sc_flags; 1799 PFSYNC_UNLOCK(sc); 1800 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1801 sizeof(pfsyncr))); 1802 1803 case SIOCGETPFSYNCNV: 1804 { 1805 nvlist_t *nvl_syncpeer; 1806 nvlist_t *nvl = nvlist_create(0); 1807 1808 if (nvl == NULL) 1809 return (ENOMEM); 1810 1811 if (sc->sc_sync_if) 1812 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1813 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1814 nvlist_add_number(nvl, "flags", sc->sc_flags); 1815 nvlist_add_number(nvl, "version", sc->sc_version); 1816 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1817 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1818 1819 void *packed = NULL; 1820 packed = nvlist_pack(nvl, &nvbuflen); 1821 if (packed == NULL) { 1822 free(packed, M_NVLIST); 1823 nvlist_destroy(nvl); 1824 return (ENOMEM); 1825 } 1826 1827 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1828 ifr->ifr_cap_nv.length = nvbuflen; 1829 ifr->ifr_cap_nv.buffer = NULL; 1830 free(packed, M_NVLIST); 1831 nvlist_destroy(nvl); 1832 return (EFBIG); 1833 } 1834 1835 ifr->ifr_cap_nv.length = nvbuflen; 1836 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1837 1838 nvlist_destroy(nvl); 1839 nvlist_destroy(nvl_syncpeer); 1840 free(packed, M_NVLIST); 1841 break; 1842 } 1843 1844 case SIOCSETPFSYNC: 1845 { 1846 struct pfsync_kstatus status; 1847 1848 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1849 return (error); 1850 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1851 sizeof(pfsyncr)))) 1852 return (error); 1853 1854 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1855 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1856 1857 error = pfsync_kstatus_to_softc(&status, sc); 1858 return (error); 1859 } 1860 case SIOCSETPFSYNCNV: 1861 { 1862 struct pfsync_kstatus status; 1863 void *data; 1864 nvlist_t *nvl; 1865 1866 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1867 return (error); 1868 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1869 return (EINVAL); 1870 1871 data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); 1872 1873 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1874 ifr->ifr_cap_nv.length)) != 0) { 1875 free(data, M_PF); 1876 return (error); 1877 } 1878 1879 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1880 free(data, M_PF); 1881 return (EINVAL); 1882 } 1883 1884 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1885 pfsync_nvstatus_to_kstatus(nvl, &status); 1886 1887 nvlist_destroy(nvl); 1888 free(data, M_PF); 1889 1890 error = pfsync_kstatus_to_softc(&status, sc); 1891 return (error); 1892 } 1893 default: 1894 return (ENOTTY); 1895 } 1896 1897 return (0); 1898 } 1899 1900 static void 1901 pfsync_out_state_1301(struct pf_kstate *st, void *buf) 1902 { 1903 struct pfsync_state_1301 *sp; 1904 1905 sp = buf; 1906 pfsync_state_export_1301(sp, st); 1907 } 1908 1909 static void 1910 pfsync_out_state_1400(struct pf_kstate *st, void *buf) 1911 { 1912 struct pfsync_state_1400 *sp; 1913 1914 sp = buf; 1915 pfsync_state_export_1400(sp, st); 1916 } 1917 1918 static void 1919 pfsync_out_state_1500(struct pf_kstate *st, void *buf) 1920 { 1921 struct pfsync_state_1500 *sp; 1922 1923 sp = buf; 1924 pfsync_state_export_1500(sp, st); 1925 } 1926 1927 static void 1928 pfsync_out_iack(struct pf_kstate *st, void *buf) 1929 { 1930 struct pfsync_ins_ack *iack = buf; 1931 1932 iack->id = st->id; 1933 iack->creatorid = st->creatorid; 1934 } 1935 1936 static void 1937 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1938 { 1939 struct pfsync_upd_c *up = buf; 1940 1941 bzero(up, sizeof(*up)); 1942 up->id = st->id; 1943 pf_state_peer_hton(&st->src, &up->src); 1944 pf_state_peer_hton(&st->dst, &up->dst); 1945 up->creatorid = st->creatorid; 1946 up->timeout = st->timeout; 1947 } 1948 1949 static void 1950 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1951 { 1952 struct pfsync_del_c *dp = buf; 1953 1954 dp->id = st->id; 1955 dp->creatorid = st->creatorid; 1956 st->state_flags |= PFSTATE_NOSYNC; 1957 } 1958 1959 static void 1960 pfsync_drop_all(struct pfsync_softc *sc) 1961 { 1962 struct pfsync_bucket *b; 1963 int c; 1964 1965 for (c = 0; c < pfsync_buckets; c++) { 1966 b = &sc->sc_buckets[c]; 1967 1968 PFSYNC_BUCKET_LOCK(b); 1969 pfsync_drop(sc, c); 1970 PFSYNC_BUCKET_UNLOCK(b); 1971 } 1972 } 1973 1974 static void 1975 pfsync_drop(struct pfsync_softc *sc, int c) 1976 { 1977 struct pf_kstate *st, *next; 1978 struct pfsync_upd_req_item *ur; 1979 struct pfsync_bucket *b; 1980 enum pfsync_q_id q; 1981 1982 b = &sc->sc_buckets[c]; 1983 PFSYNC_BUCKET_LOCK_ASSERT(b); 1984 1985 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1986 if (TAILQ_EMPTY(&b->b_qs[q])) 1987 continue; 1988 1989 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1990 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1991 ("%s: st->sync_state %d == q %d", 1992 __func__, st->sync_state, q)); 1993 st->sync_state = PFSYNC_S_NONE; 1994 pf_release_state(st); 1995 } 1996 TAILQ_INIT(&b->b_qs[q]); 1997 } 1998 1999 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 2000 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 2001 free(ur, M_PFSYNC); 2002 } 2003 2004 b->b_len = PFSYNC_MINPKT; 2005 free(b->b_plus, M_PFSYNC); 2006 b->b_plus = NULL; 2007 b->b_pluslen = 0; 2008 } 2009 2010 static void 2011 pfsync_sendout(int schedswi, int c) 2012 { 2013 struct pfsync_softc *sc = V_pfsyncif; 2014 struct ifnet *ifp = sc->sc_ifp; 2015 struct mbuf *m; 2016 struct pfsync_header *ph; 2017 struct pfsync_subheader *subh; 2018 struct pf_kstate *st, *st_next; 2019 struct pfsync_upd_req_item *ur; 2020 struct pfsync_bucket *b = &sc->sc_buckets[c]; 2021 size_t len; 2022 int aflen, offset, count = 0; 2023 enum pfsync_q_id q; 2024 2025 KASSERT(sc != NULL, ("%s: null sc", __func__)); 2026 KASSERT(b->b_len > PFSYNC_MINPKT, 2027 ("%s: sc_len %zu", __func__, b->b_len)); 2028 PFSYNC_BUCKET_LOCK_ASSERT(b); 2029 2030 if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) { 2031 pfsync_drop(sc, c); 2032 return; 2033 } 2034 2035 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 2036 if (m == NULL) { 2037 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2038 V_pfsyncstats.pfsyncs_onomem++; 2039 return; 2040 } 2041 m->m_data += max_linkhdr; 2042 bzero(m->m_data, b->b_len); 2043 2044 len = b->b_len; 2045 2046 /* build the ip header */ 2047 switch (sc->sc_sync_peer.ss_family) { 2048 #ifdef INET 2049 case AF_INET: 2050 { 2051 struct ip *ip; 2052 2053 ip = mtod(m, struct ip *); 2054 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 2055 aflen = offset = sizeof(*ip); 2056 2057 len -= sizeof(union inet_template) - sizeof(struct ip); 2058 ip->ip_len = htons(len); 2059 ip_fillid(ip, V_ip_random_id); 2060 break; 2061 } 2062 #endif 2063 #ifdef INET6 2064 case AF_INET6: 2065 { 2066 struct ip6_hdr *ip6; 2067 2068 ip6 = mtod(m, struct ip6_hdr *); 2069 bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); 2070 aflen = offset = sizeof(*ip6); 2071 2072 len -= sizeof(union inet_template) - sizeof(struct ip6_hdr); 2073 ip6->ip6_plen = htons(len); 2074 break; 2075 } 2076 #endif 2077 default: 2078 m_freem(m); 2079 pfsync_drop(sc, c); 2080 return; 2081 } 2082 m->m_len = m->m_pkthdr.len = len; 2083 2084 /* build the pfsync header */ 2085 ph = (struct pfsync_header *)(m->m_data + offset); 2086 offset += sizeof(*ph); 2087 2088 ph->version = PFSYNC_VERSION; 2089 ph->len = htons(len - aflen); 2090 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 2091 2092 /* walk the queues */ 2093 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 2094 if (TAILQ_EMPTY(&b->b_qs[q])) 2095 continue; 2096 2097 subh = (struct pfsync_subheader *)(m->m_data + offset); 2098 offset += sizeof(*subh); 2099 2100 count = 0; 2101 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 2102 KASSERT(st->sync_state == pfsync_qid_sstate[q], 2103 ("%s: st->sync_state == q", 2104 __func__)); 2105 /* 2106 * XXXGL: some of write methods do unlocked reads 2107 * of state data :( 2108 */ 2109 pfsync_qs[q].write(st, m->m_data + offset); 2110 offset += pfsync_qs[q].len; 2111 st->sync_state = PFSYNC_S_NONE; 2112 pf_release_state(st); 2113 count++; 2114 } 2115 TAILQ_INIT(&b->b_qs[q]); 2116 2117 subh->action = pfsync_qs[q].action; 2118 subh->count = htons(count); 2119 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 2120 } 2121 2122 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 2123 subh = (struct pfsync_subheader *)(m->m_data + offset); 2124 offset += sizeof(*subh); 2125 2126 count = 0; 2127 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 2128 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 2129 2130 bcopy(&ur->ur_msg, m->m_data + offset, 2131 sizeof(ur->ur_msg)); 2132 offset += sizeof(ur->ur_msg); 2133 free(ur, M_PFSYNC); 2134 count++; 2135 } 2136 2137 subh->action = PFSYNC_ACT_UPD_REQ; 2138 subh->count = htons(count); 2139 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 2140 } 2141 2142 /* has someone built a custom region for us to add? */ 2143 if (b->b_plus != NULL) { 2144 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 2145 offset += b->b_pluslen; 2146 2147 free(b->b_plus, M_PFSYNC); 2148 b->b_plus = NULL; 2149 b->b_pluslen = 0; 2150 } 2151 2152 subh = (struct pfsync_subheader *)(m->m_data + offset); 2153 offset += sizeof(*subh); 2154 2155 subh->action = PFSYNC_ACT_EOF; 2156 subh->count = htons(1); 2157 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 2158 2159 /* we're done, let's put it on the wire */ 2160 if (bpf_peers_present(ifp->if_bpf)) { 2161 m->m_data += aflen; 2162 m->m_len = m->m_pkthdr.len = len - aflen; 2163 bpf_mtap(ifp->if_bpf, m); 2164 m->m_data -= aflen; 2165 m->m_len = m->m_pkthdr.len = len; 2166 } 2167 2168 if (sc->sc_sync_if == NULL) { 2169 b->b_len = PFSYNC_MINPKT; 2170 m_freem(m); 2171 return; 2172 } 2173 2174 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 2175 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 2176 b->b_len = PFSYNC_MINPKT; 2177 2178 if (!_IF_QFULL(&b->b_snd)) 2179 _IF_ENQUEUE(&b->b_snd, m); 2180 else { 2181 m_freem(m); 2182 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 2183 } 2184 if (schedswi) 2185 swi_sched(V_pfsync_swi_cookie, 0); 2186 } 2187 2188 static void 2189 pfsync_insert_state(struct pf_kstate *st) 2190 { 2191 struct pfsync_softc *sc = V_pfsyncif; 2192 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2193 2194 if (st->state_flags & PFSTATE_NOSYNC) 2195 return; 2196 2197 if ((st->rule->rule_flag & PFRULE_NOSYNC) || 2198 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 2199 st->state_flags |= PFSTATE_NOSYNC; 2200 return; 2201 } 2202 2203 KASSERT(st->sync_state == PFSYNC_S_NONE, 2204 ("%s: st->sync_state %u", __func__, st->sync_state)); 2205 2206 PFSYNC_BUCKET_LOCK(b); 2207 if (b->b_len == PFSYNC_MINPKT) 2208 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2209 2210 pfsync_q_ins(st, PFSYNC_S_INS, true); 2211 PFSYNC_BUCKET_UNLOCK(b); 2212 2213 st->sync_updates = 0; 2214 } 2215 2216 static int 2217 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 2218 { 2219 struct pfsync_softc *sc = V_pfsyncif; 2220 struct pfsync_deferral *pd; 2221 struct pfsync_bucket *b; 2222 2223 if (m->m_flags & (M_BCAST|M_MCAST)) 2224 return (0); 2225 2226 if (sc == NULL) 2227 return (0); 2228 2229 b = pfsync_get_bucket(sc, st); 2230 2231 PFSYNC_LOCK(sc); 2232 2233 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 2234 !(sc->sc_flags & PFSYNCF_DEFER)) { 2235 PFSYNC_UNLOCK(sc); 2236 return (0); 2237 } 2238 2239 PFSYNC_BUCKET_LOCK(b); 2240 PFSYNC_UNLOCK(sc); 2241 2242 if (b->b_deferred >= 128) 2243 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 2244 2245 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 2246 if (pd == NULL) { 2247 PFSYNC_BUCKET_UNLOCK(b); 2248 return (0); 2249 } 2250 b->b_deferred++; 2251 2252 m->m_flags |= M_SKIP_FIREWALL; 2253 st->state_flags |= PFSTATE_ACK; 2254 2255 pd->pd_sc = sc; 2256 pd->pd_st = st; 2257 pf_ref_state(st); 2258 pd->pd_m = m; 2259 2260 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 2261 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 2262 callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, 2263 pfsync_defer_tmo, pd); 2264 2265 pfsync_push(b); 2266 PFSYNC_BUCKET_UNLOCK(b); 2267 2268 return (1); 2269 } 2270 2271 static void 2272 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2273 { 2274 struct pfsync_softc *sc = pd->pd_sc; 2275 struct mbuf *m = pd->pd_m; 2276 struct pf_kstate *st = pd->pd_st; 2277 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2278 2279 PFSYNC_BUCKET_LOCK_ASSERT(b); 2280 2281 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2282 b->b_deferred--; 2283 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2284 free(pd, M_PFSYNC); 2285 pf_release_state(st); 2286 2287 if (drop) 2288 m_freem(m); 2289 else { 2290 _IF_ENQUEUE(&b->b_snd, m); 2291 pfsync_push(b); 2292 } 2293 } 2294 2295 static void 2296 pfsync_defer_tmo(void *arg) 2297 { 2298 struct epoch_tracker et; 2299 struct pfsync_deferral *pd = arg; 2300 struct pfsync_softc *sc = pd->pd_sc; 2301 struct mbuf *m = pd->pd_m; 2302 struct pf_kstate *st = pd->pd_st; 2303 struct pfsync_bucket *b; 2304 2305 CURVNET_SET(sc->sc_ifp->if_vnet); 2306 2307 b = pfsync_get_bucket(sc, st); 2308 2309 PFSYNC_BUCKET_LOCK_ASSERT(b); 2310 2311 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2312 b->b_deferred--; 2313 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2314 PFSYNC_BUCKET_UNLOCK(b); 2315 free(pd, M_PFSYNC); 2316 2317 if (sc->sc_sync_if == NULL) { 2318 pf_release_state(st); 2319 m_freem(m); 2320 CURVNET_RESTORE(); 2321 return; 2322 } 2323 2324 NET_EPOCH_ENTER(et); 2325 2326 pfsync_tx(sc, m); 2327 2328 pf_release_state(st); 2329 2330 CURVNET_RESTORE(); 2331 NET_EPOCH_EXIT(et); 2332 } 2333 2334 static void 2335 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 2336 { 2337 struct pfsync_softc *sc = V_pfsyncif; 2338 struct pfsync_deferral *pd; 2339 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2340 2341 PFSYNC_BUCKET_LOCK_ASSERT(b); 2342 2343 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 2344 if (pd->pd_st == st) { 2345 if (callout_stop(&pd->pd_tmo) > 0) 2346 pfsync_undefer(pd, drop); 2347 2348 return; 2349 } 2350 } 2351 2352 panic("%s: unable to find deferred state", __func__); 2353 } 2354 2355 static void 2356 pfsync_undefer_state(struct pf_kstate *st, int drop) 2357 { 2358 struct pfsync_softc *sc = V_pfsyncif; 2359 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2360 2361 PFSYNC_BUCKET_LOCK(b); 2362 pfsync_undefer_state_locked(st, drop); 2363 PFSYNC_BUCKET_UNLOCK(b); 2364 } 2365 2366 static struct pfsync_bucket* 2367 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 2368 { 2369 int c = PF_IDHASH(st) % pfsync_buckets; 2370 return &sc->sc_buckets[c]; 2371 } 2372 2373 static void 2374 pfsync_update_state(struct pf_kstate *st) 2375 { 2376 struct pfsync_softc *sc = V_pfsyncif; 2377 bool sync = false, ref = true; 2378 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2379 2380 PF_STATE_LOCK_ASSERT(st); 2381 PFSYNC_BUCKET_LOCK(b); 2382 2383 if (st->state_flags & PFSTATE_ACK) 2384 pfsync_undefer_state_locked(st, 0); 2385 if (st->state_flags & PFSTATE_NOSYNC) { 2386 if (st->sync_state != PFSYNC_S_NONE) 2387 pfsync_q_del(st, true, b); 2388 PFSYNC_BUCKET_UNLOCK(b); 2389 return; 2390 } 2391 2392 if (b->b_len == PFSYNC_MINPKT) 2393 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2394 2395 switch (st->sync_state) { 2396 case PFSYNC_S_UPD_C: 2397 case PFSYNC_S_UPD: 2398 case PFSYNC_S_INS: 2399 /* we're already handling it */ 2400 2401 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2402 st->sync_updates++; 2403 if (st->sync_updates >= sc->sc_maxupdates) 2404 sync = true; 2405 } 2406 break; 2407 2408 case PFSYNC_S_IACK: 2409 pfsync_q_del(st, false, b); 2410 ref = false; 2411 /* FALLTHROUGH */ 2412 2413 case PFSYNC_S_NONE: 2414 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 2415 st->sync_updates = 0; 2416 break; 2417 2418 default: 2419 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2420 } 2421 2422 if (sync || (time_uptime - st->pfsync_time) < 2) 2423 pfsync_push(b); 2424 2425 PFSYNC_BUCKET_UNLOCK(b); 2426 } 2427 2428 static void 2429 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2430 { 2431 struct pfsync_softc *sc = V_pfsyncif; 2432 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2433 struct pfsync_upd_req_item *item; 2434 size_t nlen = sizeof(struct pfsync_upd_req); 2435 2436 PFSYNC_BUCKET_LOCK_ASSERT(b); 2437 2438 /* 2439 * This code does a bit to prevent multiple update requests for the 2440 * same state being generated. It searches current subheader queue, 2441 * but it doesn't lookup into queue of already packed datagrams. 2442 */ 2443 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 2444 if (item->ur_msg.id == id && 2445 item->ur_msg.creatorid == creatorid) 2446 return; 2447 2448 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 2449 if (item == NULL) 2450 return; /* XXX stats */ 2451 2452 item->ur_msg.id = id; 2453 item->ur_msg.creatorid = creatorid; 2454 2455 if (TAILQ_EMPTY(&b->b_upd_req_list)) 2456 nlen += sizeof(struct pfsync_subheader); 2457 2458 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2459 pfsync_sendout(0, 0); 2460 2461 nlen = sizeof(struct pfsync_subheader) + 2462 sizeof(struct pfsync_upd_req); 2463 } 2464 2465 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2466 b->b_len += nlen; 2467 2468 pfsync_push(b); 2469 } 2470 2471 static bool 2472 pfsync_update_state_req(struct pf_kstate *st) 2473 { 2474 struct pfsync_softc *sc = V_pfsyncif; 2475 bool ref = true, full = false; 2476 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2477 2478 PF_STATE_LOCK_ASSERT(st); 2479 PFSYNC_BUCKET_LOCK(b); 2480 2481 if (st->state_flags & PFSTATE_NOSYNC) { 2482 if (st->sync_state != PFSYNC_S_NONE) 2483 pfsync_q_del(st, true, b); 2484 PFSYNC_BUCKET_UNLOCK(b); 2485 return (full); 2486 } 2487 2488 switch (st->sync_state) { 2489 case PFSYNC_S_UPD_C: 2490 case PFSYNC_S_IACK: 2491 pfsync_q_del(st, false, b); 2492 ref = false; 2493 /* FALLTHROUGH */ 2494 2495 case PFSYNC_S_NONE: 2496 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2497 pfsync_push(b); 2498 break; 2499 2500 case PFSYNC_S_INS: 2501 case PFSYNC_S_UPD: 2502 case PFSYNC_S_DEL_C: 2503 /* we're already handling it */ 2504 break; 2505 2506 default: 2507 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2508 } 2509 2510 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) 2511 full = true; 2512 2513 PFSYNC_BUCKET_UNLOCK(b); 2514 2515 return (full); 2516 } 2517 2518 static void 2519 pfsync_delete_state(struct pf_kstate *st) 2520 { 2521 struct pfsync_softc *sc = V_pfsyncif; 2522 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2523 bool ref = true; 2524 2525 PFSYNC_BUCKET_LOCK(b); 2526 if (st->state_flags & PFSTATE_ACK) 2527 pfsync_undefer_state_locked(st, 1); 2528 if (st->state_flags & PFSTATE_NOSYNC) { 2529 if (st->sync_state != PFSYNC_S_NONE) 2530 pfsync_q_del(st, true, b); 2531 PFSYNC_BUCKET_UNLOCK(b); 2532 return; 2533 } 2534 2535 if (b->b_len == PFSYNC_MINPKT) 2536 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2537 2538 switch (st->sync_state) { 2539 case PFSYNC_S_INS: 2540 /* We never got to tell the world so just forget about it. */ 2541 pfsync_q_del(st, true, b); 2542 break; 2543 2544 case PFSYNC_S_UPD_C: 2545 case PFSYNC_S_UPD: 2546 case PFSYNC_S_IACK: 2547 pfsync_q_del(st, false, b); 2548 ref = false; 2549 /* FALLTHROUGH */ 2550 2551 case PFSYNC_S_NONE: 2552 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2553 break; 2554 2555 default: 2556 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2557 } 2558 2559 PFSYNC_BUCKET_UNLOCK(b); 2560 } 2561 2562 static void 2563 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2564 { 2565 struct { 2566 struct pfsync_subheader subh; 2567 struct pfsync_clr clr; 2568 } __packed r; 2569 2570 bzero(&r, sizeof(r)); 2571 2572 r.subh.action = PFSYNC_ACT_CLR; 2573 r.subh.count = htons(1); 2574 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2575 2576 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2577 r.clr.creatorid = creatorid; 2578 2579 pfsync_send_plus(&r, sizeof(r)); 2580 } 2581 2582 static enum pfsync_q_id 2583 pfsync_sstate_to_qid(u_int8_t sync_state) 2584 { 2585 struct pfsync_softc *sc = V_pfsyncif; 2586 2587 switch (sync_state) { 2588 case PFSYNC_S_INS: 2589 switch (sc->sc_version) { 2590 case PFSYNC_MSG_VERSION_1301: 2591 return PFSYNC_Q_INS_1301; 2592 case PFSYNC_MSG_VERSION_1400: 2593 return PFSYNC_Q_INS_1400; 2594 case PFSYNC_MSG_VERSION_1500: 2595 return PFSYNC_Q_INS_1500; 2596 } 2597 break; 2598 case PFSYNC_S_IACK: 2599 return PFSYNC_Q_IACK; 2600 case PFSYNC_S_UPD: 2601 switch (sc->sc_version) { 2602 case PFSYNC_MSG_VERSION_1301: 2603 return PFSYNC_Q_UPD_1301; 2604 case PFSYNC_MSG_VERSION_1400: 2605 return PFSYNC_Q_UPD_1400; 2606 case PFSYNC_MSG_VERSION_1500: 2607 return PFSYNC_Q_UPD_1500; 2608 } 2609 break; 2610 case PFSYNC_S_UPD_C: 2611 return PFSYNC_Q_UPD_C; 2612 case PFSYNC_S_DEL_C: 2613 return PFSYNC_Q_DEL_C; 2614 default: 2615 panic("%s: Unsupported st->sync_state 0x%02x", 2616 __func__, sync_state); 2617 } 2618 2619 panic("%s: Unsupported pfsync_msg_version %d", 2620 __func__, sc->sc_version); 2621 } 2622 2623 static void 2624 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) 2625 { 2626 enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); 2627 struct pfsync_softc *sc = V_pfsyncif; 2628 size_t nlen = pfsync_qs[q].len; 2629 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2630 2631 PFSYNC_BUCKET_LOCK_ASSERT(b); 2632 2633 KASSERT(st->sync_state == PFSYNC_S_NONE, 2634 ("%s: st->sync_state %u", __func__, st->sync_state)); 2635 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2636 b->b_len)); 2637 2638 if (TAILQ_EMPTY(&b->b_qs[q])) 2639 nlen += sizeof(struct pfsync_subheader); 2640 2641 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2642 pfsync_sendout(1, b->b_id); 2643 2644 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2645 } 2646 2647 b->b_len += nlen; 2648 st->sync_state = pfsync_qid_sstate[q]; 2649 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2650 if (ref) 2651 pf_ref_state(st); 2652 } 2653 2654 static void 2655 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2656 { 2657 enum pfsync_q_id q; 2658 2659 PFSYNC_BUCKET_LOCK_ASSERT(b); 2660 KASSERT(st->sync_state != PFSYNC_S_NONE, 2661 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2662 2663 q = pfsync_sstate_to_qid(st->sync_state); 2664 b->b_len -= pfsync_qs[q].len; 2665 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2666 st->sync_state = PFSYNC_S_NONE; 2667 if (unref) 2668 pf_release_state(st); 2669 2670 if (TAILQ_EMPTY(&b->b_qs[q])) 2671 b->b_len -= sizeof(struct pfsync_subheader); 2672 } 2673 2674 static void 2675 pfsync_bulk_start(void) 2676 { 2677 struct pfsync_softc *sc = V_pfsyncif; 2678 2679 if (V_pf_status.debug >= PF_DEBUG_MISC) 2680 printf("pfsync: received bulk update request\n"); 2681 2682 PFSYNC_BLOCK(sc); 2683 2684 sc->sc_ureq_received = time_uptime; 2685 sc->sc_bulk_hashid = 0; 2686 sc->sc_bulk_stateid = 0; 2687 pfsync_bulk_status(PFSYNC_BUS_START); 2688 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2689 PFSYNC_BUNLOCK(sc); 2690 } 2691 2692 static void 2693 pfsync_bulk_update(void *arg) 2694 { 2695 struct pfsync_softc *sc = arg; 2696 struct pf_kstate *s; 2697 int i; 2698 2699 PFSYNC_BLOCK_ASSERT(sc); 2700 CURVNET_SET(sc->sc_ifp->if_vnet); 2701 2702 /* 2703 * Start with last state from previous invocation. 2704 * It may had gone, in this case start from the 2705 * hash slot. 2706 */ 2707 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2708 2709 if (s != NULL) 2710 i = PF_IDHASH(s); 2711 else 2712 i = sc->sc_bulk_hashid; 2713 2714 for (; i <= V_pf_hashmask; i++) { 2715 struct pf_idhash *ih = &V_pf_idhash[i]; 2716 2717 if (s != NULL) 2718 PF_HASHROW_ASSERT(ih); 2719 else { 2720 PF_HASHROW_LOCK(ih); 2721 s = LIST_FIRST(&ih->states); 2722 } 2723 2724 for (; s; s = LIST_NEXT(s, entry)) { 2725 if (s->sync_state == PFSYNC_S_NONE && 2726 s->timeout < PFTM_MAX && 2727 s->pfsync_time <= sc->sc_ureq_received) { 2728 if (pfsync_update_state_req(s)) { 2729 /* We've filled a packet. */ 2730 sc->sc_bulk_hashid = i; 2731 sc->sc_bulk_stateid = s->id; 2732 sc->sc_bulk_creatorid = s->creatorid; 2733 PF_HASHROW_UNLOCK(ih); 2734 callout_reset(&sc->sc_bulk_tmo, 1, 2735 pfsync_bulk_update, sc); 2736 goto full; 2737 } 2738 } 2739 } 2740 PF_HASHROW_UNLOCK(ih); 2741 } 2742 2743 /* We're done. */ 2744 pfsync_bulk_status(PFSYNC_BUS_END); 2745 full: 2746 CURVNET_RESTORE(); 2747 } 2748 2749 static void 2750 pfsync_bulk_status(u_int8_t status) 2751 { 2752 struct { 2753 struct pfsync_subheader subh; 2754 struct pfsync_bus bus; 2755 } __packed r; 2756 2757 struct pfsync_softc *sc = V_pfsyncif; 2758 2759 bzero(&r, sizeof(r)); 2760 2761 r.subh.action = PFSYNC_ACT_BUS; 2762 r.subh.count = htons(1); 2763 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2764 2765 r.bus.creatorid = V_pf_status.hostid; 2766 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2767 r.bus.status = status; 2768 2769 pfsync_send_plus(&r, sizeof(r)); 2770 } 2771 2772 static void 2773 pfsync_bulk_fail(void *arg) 2774 { 2775 struct pfsync_softc *sc = arg; 2776 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2777 2778 CURVNET_SET(sc->sc_ifp->if_vnet); 2779 2780 PFSYNC_BLOCK_ASSERT(sc); 2781 2782 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2783 /* Try again */ 2784 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2785 pfsync_bulk_fail, V_pfsyncif); 2786 PFSYNC_BUCKET_LOCK(b); 2787 pfsync_request_update(0, 0); 2788 PFSYNC_BUCKET_UNLOCK(b); 2789 } else { 2790 /* Pretend like the transfer was ok. */ 2791 sc->sc_ureq_sent = 0; 2792 sc->sc_bulk_tries = 0; 2793 PFSYNC_LOCK(sc); 2794 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2795 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2796 "pfsync bulk fail"); 2797 sc->sc_flags |= PFSYNCF_OK; 2798 PFSYNC_UNLOCK(sc); 2799 if (V_pf_status.debug >= PF_DEBUG_MISC) 2800 printf("pfsync: failed to receive bulk update\n"); 2801 } 2802 2803 CURVNET_RESTORE(); 2804 } 2805 2806 static void 2807 pfsync_send_plus(void *plus, size_t pluslen) 2808 { 2809 struct pfsync_softc *sc = V_pfsyncif; 2810 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2811 uint8_t *newplus; 2812 2813 PFSYNC_BUCKET_LOCK(b); 2814 2815 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2816 pfsync_sendout(1, b->b_id); 2817 2818 newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT); 2819 if (newplus == NULL) 2820 goto out; 2821 2822 if (b->b_plus != NULL) { 2823 memcpy(newplus, b->b_plus, b->b_pluslen); 2824 free(b->b_plus, M_PFSYNC); 2825 } else { 2826 MPASS(b->b_pluslen == 0); 2827 } 2828 memcpy(newplus + b->b_pluslen, plus, pluslen); 2829 2830 b->b_plus = newplus; 2831 b->b_pluslen += pluslen; 2832 b->b_len += pluslen; 2833 2834 pfsync_sendout(1, b->b_id); 2835 2836 out: 2837 PFSYNC_BUCKET_UNLOCK(b); 2838 } 2839 2840 static void 2841 pfsync_timeout(void *arg) 2842 { 2843 struct pfsync_bucket *b = arg; 2844 2845 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2846 PFSYNC_BUCKET_LOCK(b); 2847 pfsync_push(b); 2848 PFSYNC_BUCKET_UNLOCK(b); 2849 CURVNET_RESTORE(); 2850 } 2851 2852 static void 2853 pfsync_push(struct pfsync_bucket *b) 2854 { 2855 2856 PFSYNC_BUCKET_LOCK_ASSERT(b); 2857 2858 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2859 swi_sched(V_pfsync_swi_cookie, 0); 2860 } 2861 2862 static void 2863 pfsync_push_all(struct pfsync_softc *sc) 2864 { 2865 int c; 2866 struct pfsync_bucket *b; 2867 2868 for (c = 0; c < pfsync_buckets; c++) { 2869 b = &sc->sc_buckets[c]; 2870 2871 PFSYNC_BUCKET_LOCK(b); 2872 pfsync_push(b); 2873 PFSYNC_BUCKET_UNLOCK(b); 2874 } 2875 } 2876 2877 static void 2878 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2879 { 2880 struct ip *ip; 2881 int af, error = 0; 2882 2883 ip = mtod(m, struct ip *); 2884 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2885 2886 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2887 2888 /* 2889 * We distinguish between a deferral packet and our 2890 * own pfsync packet based on M_SKIP_FIREWALL 2891 * flag. This is XXX. 2892 */ 2893 switch (af) { 2894 #ifdef INET 2895 case AF_INET: 2896 if (m->m_flags & M_SKIP_FIREWALL) { 2897 error = ip_output(m, NULL, NULL, 0, 2898 NULL, NULL); 2899 } else { 2900 error = ip_output(m, NULL, NULL, 2901 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2902 } 2903 break; 2904 #endif 2905 #ifdef INET6 2906 case AF_INET6: 2907 if (m->m_flags & M_SKIP_FIREWALL) { 2908 error = ip6_output(m, NULL, NULL, 0, 2909 NULL, NULL, NULL); 2910 } else { 2911 error = ip6_output(m, NULL, NULL, 0, 2912 &sc->sc_im6o, NULL, NULL); 2913 } 2914 break; 2915 #endif 2916 } 2917 2918 if (error == 0) 2919 V_pfsyncstats.pfsyncs_opackets++; 2920 else 2921 V_pfsyncstats.pfsyncs_oerrors++; 2922 2923 } 2924 2925 static void 2926 pfsyncintr(void *arg) 2927 { 2928 struct epoch_tracker et; 2929 struct pfsync_softc *sc = arg; 2930 struct pfsync_bucket *b; 2931 struct mbuf *m, *n; 2932 int c; 2933 2934 NET_EPOCH_ENTER(et); 2935 CURVNET_SET(sc->sc_ifp->if_vnet); 2936 2937 for (c = 0; c < pfsync_buckets; c++) { 2938 b = &sc->sc_buckets[c]; 2939 2940 PFSYNC_BUCKET_LOCK(b); 2941 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2942 pfsync_sendout(0, b->b_id); 2943 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2944 } 2945 _IF_DEQUEUE_ALL(&b->b_snd, m); 2946 PFSYNC_BUCKET_UNLOCK(b); 2947 2948 for (; m != NULL; m = n) { 2949 n = m->m_nextpkt; 2950 m->m_nextpkt = NULL; 2951 2952 pfsync_tx(sc, m); 2953 } 2954 } 2955 CURVNET_RESTORE(); 2956 NET_EPOCH_EXIT(et); 2957 } 2958 2959 static int 2960 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2961 struct in_mfilter* imf, struct in6_mfilter* im6f) 2962 { 2963 #ifdef INET 2964 struct ip_moptions *imo = &sc->sc_imo; 2965 #endif 2966 #ifdef INET6 2967 struct ip6_moptions *im6o = &sc->sc_im6o; 2968 struct sockaddr_in6 *syncpeer_sa6 = NULL; 2969 #endif 2970 2971 if (!(ifp->if_flags & IFF_MULTICAST)) 2972 return (EADDRNOTAVAIL); 2973 2974 switch (sc->sc_sync_peer.ss_family) { 2975 #ifdef INET 2976 case AF_INET: 2977 { 2978 int error; 2979 2980 ip_mfilter_init(&imo->imo_head); 2981 imo->imo_multicast_vif = -1; 2982 if ((error = in_joingroup(ifp, 2983 &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2984 &imf->imf_inm)) != 0) 2985 return (error); 2986 2987 ip_mfilter_insert(&imo->imo_head, imf); 2988 imo->imo_multicast_ifp = ifp; 2989 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2990 imo->imo_multicast_loop = 0; 2991 break; 2992 } 2993 #endif 2994 #ifdef INET6 2995 case AF_INET6: 2996 { 2997 int error; 2998 2999 syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; 3000 if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) 3001 return (error); 3002 3003 ip6_mfilter_init(&im6o->im6o_head); 3004 if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, 3005 &(im6f->im6f_in6m), 0)) != 0) 3006 return (error); 3007 3008 ip6_mfilter_insert(&im6o->im6o_head, im6f); 3009 im6o->im6o_multicast_ifp = ifp; 3010 im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; 3011 im6o->im6o_multicast_loop = 0; 3012 break; 3013 } 3014 #endif 3015 } 3016 3017 return (0); 3018 } 3019 3020 static void 3021 pfsync_multicast_cleanup(struct pfsync_softc *sc) 3022 { 3023 #ifdef INET 3024 struct ip_moptions *imo = &sc->sc_imo; 3025 struct in_mfilter *imf; 3026 3027 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 3028 ip_mfilter_remove(&imo->imo_head, imf); 3029 in_leavegroup(imf->imf_inm, NULL); 3030 ip_mfilter_free(imf); 3031 } 3032 imo->imo_multicast_ifp = NULL; 3033 #endif 3034 3035 #ifdef INET6 3036 struct ip6_moptions *im6o = &sc->sc_im6o; 3037 struct in6_mfilter *im6f; 3038 3039 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 3040 ip6_mfilter_remove(&im6o->im6o_head, im6f); 3041 in6_leavegroup(im6f->im6f_in6m, NULL); 3042 ip6_mfilter_free(im6f); 3043 } 3044 im6o->im6o_multicast_ifp = NULL; 3045 #endif 3046 } 3047 3048 void 3049 pfsync_detach_ifnet(struct ifnet *ifp) 3050 { 3051 struct pfsync_softc *sc = V_pfsyncif; 3052 3053 if (sc == NULL) 3054 return; 3055 3056 PFSYNC_LOCK(sc); 3057 3058 if (sc->sc_sync_if == ifp) { 3059 /* We don't need mutlicast cleanup here, because the interface 3060 * is going away. We do need to ensure we don't try to do 3061 * cleanup later. 3062 */ 3063 ip_mfilter_init(&sc->sc_imo.imo_head); 3064 sc->sc_imo.imo_multicast_ifp = NULL; 3065 sc->sc_im6o.im6o_multicast_ifp = NULL; 3066 sc->sc_sync_if = NULL; 3067 } 3068 3069 PFSYNC_UNLOCK(sc); 3070 } 3071 3072 static int 3073 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 3074 { 3075 struct sockaddr_storage sa; 3076 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 3077 status->flags = pfsyncr->pfsyncr_defer; 3078 3079 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 3080 3081 memset(&sa, 0, sizeof(sa)); 3082 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 3083 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 3084 in->sin_family = AF_INET; 3085 in->sin_len = sizeof(*in); 3086 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 3087 } 3088 status->syncpeer = sa; 3089 3090 return 0; 3091 } 3092 3093 static int 3094 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 3095 { 3096 struct ifnet *sifp; 3097 struct in_mfilter *imf = NULL; 3098 struct in6_mfilter *im6f = NULL; 3099 int error; 3100 int c; 3101 3102 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 3103 return (EINVAL); 3104 3105 if (status->syncdev[0] == '\0') 3106 sifp = NULL; 3107 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 3108 return (EINVAL); 3109 3110 switch (status->syncpeer.ss_family) { 3111 #ifdef INET 3112 case AF_UNSPEC: 3113 case AF_INET: { 3114 struct sockaddr_in *status_sin; 3115 status_sin = (struct sockaddr_in *)&(status->syncpeer); 3116 if (sifp != NULL) { 3117 if (status_sin->sin_addr.s_addr == 0 || 3118 status_sin->sin_addr.s_addr == 3119 htonl(INADDR_PFSYNC_GROUP)) { 3120 status_sin->sin_family = AF_INET; 3121 status_sin->sin_len = sizeof(*status_sin); 3122 status_sin->sin_addr.s_addr = 3123 htonl(INADDR_PFSYNC_GROUP); 3124 } 3125 3126 if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { 3127 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 3128 } 3129 } 3130 break; 3131 } 3132 #endif 3133 #ifdef INET6 3134 case AF_INET6: { 3135 struct sockaddr_in6 *status_sin6; 3136 status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); 3137 if (sifp != NULL) { 3138 if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || 3139 IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, 3140 &in6addr_linklocal_pfsync_group)) { 3141 status_sin6->sin6_family = AF_INET6; 3142 status_sin6->sin6_len = sizeof(*status_sin6); 3143 status_sin6->sin6_addr = 3144 in6addr_linklocal_pfsync_group; 3145 } 3146 3147 if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { 3148 im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); 3149 } 3150 } 3151 break; 3152 } 3153 #endif 3154 } 3155 3156 PFSYNC_LOCK(sc); 3157 3158 switch (status->version) { 3159 case PFSYNC_MSG_VERSION_UNSPECIFIED: 3160 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 3161 break; 3162 case PFSYNC_MSG_VERSION_1301: 3163 case PFSYNC_MSG_VERSION_1400: 3164 case PFSYNC_MSG_VERSION_1500: 3165 sc->sc_version = status->version; 3166 break; 3167 default: 3168 PFSYNC_UNLOCK(sc); 3169 return (EINVAL); 3170 } 3171 3172 switch (status->syncpeer.ss_family) { 3173 case AF_INET: { 3174 struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); 3175 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 3176 sc_sin->sin_family = AF_INET; 3177 sc_sin->sin_len = sizeof(*sc_sin); 3178 if (status_sin->sin_addr.s_addr == 0) { 3179 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 3180 } else { 3181 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 3182 } 3183 break; 3184 } 3185 case AF_INET6: { 3186 struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); 3187 struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; 3188 sc_sin->sin6_family = AF_INET6; 3189 sc_sin->sin6_len = sizeof(*sc_sin); 3190 if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { 3191 sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; 3192 } else { 3193 sc_sin->sin6_addr = status_sin->sin6_addr; 3194 } 3195 break; 3196 } 3197 } 3198 3199 sc->sc_maxupdates = status->maxupdates; 3200 if (status->flags & PFSYNCF_DEFER) { 3201 sc->sc_flags |= PFSYNCF_DEFER; 3202 V_pfsync_defer_ptr = pfsync_defer; 3203 } else { 3204 sc->sc_flags &= ~PFSYNCF_DEFER; 3205 V_pfsync_defer_ptr = NULL; 3206 } 3207 3208 if (sifp == NULL) { 3209 if (sc->sc_sync_if) 3210 if_rele(sc->sc_sync_if); 3211 sc->sc_sync_if = NULL; 3212 pfsync_multicast_cleanup(sc); 3213 PFSYNC_UNLOCK(sc); 3214 return (0); 3215 } 3216 3217 for (c = 0; c < pfsync_buckets; c++) { 3218 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 3219 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 3220 (sifp->if_mtu < sc->sc_ifp->if_mtu || 3221 (sc->sc_sync_if != NULL && 3222 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 3223 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 3224 pfsync_sendout(1, c); 3225 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 3226 } 3227 3228 pfsync_multicast_cleanup(sc); 3229 3230 if (((sc->sc_sync_peer.ss_family == AF_INET) && 3231 IN_MULTICAST(ntohl(((struct sockaddr_in *) 3232 &sc->sc_sync_peer)->sin_addr.s_addr))) || 3233 ((sc->sc_sync_peer.ss_family == AF_INET6) && 3234 IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) 3235 &sc->sc_sync_peer)->sin6_addr))) { 3236 error = pfsync_multicast_setup(sc, sifp, imf, im6f); 3237 if (error) { 3238 if_rele(sifp); 3239 PFSYNC_UNLOCK(sc); 3240 #ifdef INET 3241 if (imf != NULL) 3242 ip_mfilter_free(imf); 3243 #endif 3244 #ifdef INET6 3245 if (im6f != NULL) 3246 ip6_mfilter_free(im6f); 3247 #endif 3248 return (error); 3249 } 3250 } 3251 if (sc->sc_sync_if) 3252 if_rele(sc->sc_sync_if); 3253 sc->sc_sync_if = sifp; 3254 3255 switch (sc->sc_sync_peer.ss_family) { 3256 #ifdef INET 3257 case AF_INET: { 3258 struct ip *ip; 3259 ip = &sc->sc_template.ipv4; 3260 bzero(ip, sizeof(*ip)); 3261 ip->ip_v = IPVERSION; 3262 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 3263 ip->ip_tos = IPTOS_LOWDELAY; 3264 /* len and id are set later. */ 3265 ip->ip_off = htons(IP_DF); 3266 ip->ip_ttl = PFSYNC_DFLTTL; 3267 ip->ip_p = IPPROTO_PFSYNC; 3268 ip->ip_src.s_addr = INADDR_ANY; 3269 ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 3270 break; 3271 } 3272 #endif 3273 #ifdef INET6 3274 case AF_INET6: { 3275 struct ip6_hdr *ip6; 3276 ip6 = &sc->sc_template.ipv6; 3277 bzero(ip6, sizeof(*ip6)); 3278 ip6->ip6_vfc = IPV6_VERSION; 3279 ip6->ip6_hlim = PFSYNC_DFLTTL; 3280 ip6->ip6_nxt = IPPROTO_PFSYNC; 3281 ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; 3282 3283 struct epoch_tracker et; 3284 NET_EPOCH_ENTER(et); 3285 in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, 3286 sc->sc_sync_if, &ip6->ip6_src, NULL); 3287 NET_EPOCH_EXIT(et); 3288 break; 3289 } 3290 #endif 3291 } 3292 3293 /* Request a full state table update. */ 3294 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 3295 (*carp_demote_adj_p)(V_pfsync_carp_adj, 3296 "pfsync bulk start"); 3297 sc->sc_flags &= ~PFSYNCF_OK; 3298 if (V_pf_status.debug >= PF_DEBUG_MISC) 3299 printf("pfsync: requesting bulk update\n"); 3300 PFSYNC_UNLOCK(sc); 3301 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 3302 pfsync_request_update(0, 0); 3303 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 3304 PFSYNC_BLOCK(sc); 3305 sc->sc_ureq_sent = time_uptime; 3306 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 3307 PFSYNC_BUNLOCK(sc); 3308 return (0); 3309 } 3310 3311 static void 3312 pfsync_pointers_init(void) 3313 { 3314 3315 PF_RULES_WLOCK(); 3316 V_pfsync_state_import_ptr = pfsync_state_import; 3317 V_pfsync_insert_state_ptr = pfsync_insert_state; 3318 V_pfsync_update_state_ptr = pfsync_update_state; 3319 V_pfsync_delete_state_ptr = pfsync_delete_state; 3320 V_pfsync_clear_states_ptr = pfsync_clear_states; 3321 V_pfsync_defer_ptr = pfsync_defer; 3322 PF_RULES_WUNLOCK(); 3323 } 3324 3325 static void 3326 pfsync_pointers_uninit(void) 3327 { 3328 3329 PF_RULES_WLOCK(); 3330 V_pfsync_state_import_ptr = NULL; 3331 V_pfsync_insert_state_ptr = NULL; 3332 V_pfsync_update_state_ptr = NULL; 3333 V_pfsync_delete_state_ptr = NULL; 3334 V_pfsync_clear_states_ptr = NULL; 3335 V_pfsync_defer_ptr = NULL; 3336 PF_RULES_WUNLOCK(); 3337 } 3338 3339 static void 3340 vnet_pfsync_init(const void *unused __unused) 3341 { 3342 int error; 3343 3344 V_pfsync_cloner = if_clone_simple(pfsyncname, 3345 pfsync_clone_create, pfsync_clone_destroy, 1); 3346 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 3347 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 3348 if (error) { 3349 if_clone_detach(V_pfsync_cloner); 3350 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 3351 } 3352 3353 pfsync_pointers_init(); 3354 } 3355 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 3356 vnet_pfsync_init, NULL); 3357 3358 static void 3359 vnet_pfsync_uninit(const void *unused __unused) 3360 { 3361 int ret __diagused; 3362 3363 pfsync_pointers_uninit(); 3364 3365 if_clone_detach(V_pfsync_cloner); 3366 ret = swi_remove(V_pfsync_swi_cookie); 3367 MPASS(ret == 0); 3368 ret = intr_event_destroy(V_pfsync_swi_ie); 3369 MPASS(ret == 0); 3370 } 3371 3372 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 3373 vnet_pfsync_uninit, NULL); 3374 3375 static int 3376 pfsync_init(void) 3377 { 3378 int error; 3379 3380 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 3381 3382 #ifdef INET 3383 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 3384 if (error) 3385 return (error); 3386 #endif 3387 #ifdef INET6 3388 error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); 3389 if (error) { 3390 ipproto_unregister(IPPROTO_PFSYNC); 3391 return (error); 3392 } 3393 #endif 3394 3395 return (0); 3396 } 3397 3398 static void 3399 pfsync_uninit(void) 3400 { 3401 pfsync_detach_ifnet_ptr = NULL; 3402 3403 #ifdef INET 3404 ipproto_unregister(IPPROTO_PFSYNC); 3405 #endif 3406 #ifdef INET6 3407 ip6proto_unregister(IPPROTO_PFSYNC); 3408 #endif 3409 } 3410 3411 static int 3412 pfsync_modevent(module_t mod, int type, void *data) 3413 { 3414 int error = 0; 3415 3416 switch (type) { 3417 case MOD_LOAD: 3418 error = pfsync_init(); 3419 break; 3420 case MOD_UNLOAD: 3421 pfsync_uninit(); 3422 break; 3423 default: 3424 error = EINVAL; 3425 break; 3426 } 3427 3428 return (error); 3429 } 3430 3431 static moduledata_t pfsync_mod = { 3432 pfsyncname, 3433 pfsync_modevent, 3434 0 3435 }; 3436 3437 #define PFSYNC_MODVER 1 3438 3439 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 3440 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 3441 MODULE_VERSION(pfsync, PFSYNC_MODVER); 3442 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 3443