1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 #include "opt_pf.h" 66 67 #include <sys/param.h> 68 #include <sys/bus.h> 69 #include <sys/endian.h> 70 #include <sys/interrupt.h> 71 #include <sys/kernel.h> 72 #include <sys/lock.h> 73 #include <sys/mbuf.h> 74 #include <sys/module.h> 75 #include <sys/mutex.h> 76 #include <sys/nv.h> 77 #include <sys/priv.h> 78 #include <sys/smp.h> 79 #include <sys/socket.h> 80 #include <sys/sockio.h> 81 #include <sys/sysctl.h> 82 #include <sys/syslog.h> 83 84 #include <net/bpf.h> 85 #include <net/if.h> 86 #include <net/if_var.h> 87 #include <net/if_clone.h> 88 #include <net/if_private.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/route.h> 93 #include <net/if_pfsync.h> 94 95 #include <netinet/if_ether.h> 96 #include <netinet/in.h> 97 #include <netinet/in_var.h> 98 #include <netinet6/in6_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip6.h> 101 #include <netinet/ip_carp.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/tcp.h> 104 #include <netinet/tcp_fsm.h> 105 #include <netinet/tcp_seq.h> 106 107 #include <netinet/ip6.h> 108 #include <netinet6/ip6_var.h> 109 #include <netinet6/scope6_var.h> 110 111 #include <netpfil/pf/pfsync_nv.h> 112 113 struct pfsync_bucket; 114 struct pfsync_softc; 115 116 union inet_template { 117 struct ip ipv4; 118 struct ip6_hdr ipv6; 119 }; 120 121 #define PFSYNC_MINPKT ( \ 122 sizeof(union inet_template) + \ 123 sizeof(struct pfsync_header) + \ 124 sizeof(struct pfsync_subheader) ) 125 126 static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, 127 struct pf_state_peer_export *); 128 static int pfsync_in_clr(struct mbuf *, int, int, int, int); 129 static int pfsync_in_ins(struct mbuf *, int, int, int, int); 130 static int pfsync_in_iack(struct mbuf *, int, int, int, int); 131 static int pfsync_in_upd(struct mbuf *, int, int, int, int); 132 static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); 133 static int pfsync_in_ureq(struct mbuf *, int, int, int, int); 134 static int pfsync_in_del_c(struct mbuf *, int, int, int, int); 135 static int pfsync_in_bus(struct mbuf *, int, int, int, int); 136 static int pfsync_in_tdb(struct mbuf *, int, int, int, int); 137 static int pfsync_in_eof(struct mbuf *, int, int, int, int); 138 static int pfsync_in_error(struct mbuf *, int, int, int, int); 139 140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { 141 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 142 pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ 143 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 144 pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ 145 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 146 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL */ 148 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 149 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 150 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 151 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 152 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 153 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 154 pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ 155 pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ 156 pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */ 157 pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */ 158 }; 159 160 struct pfsync_q { 161 void (*write)(struct pf_kstate *, void *); 162 size_t len; 163 u_int8_t action; 164 }; 165 166 /* We have the following sync queues */ 167 enum pfsync_q_id { 168 PFSYNC_Q_INS_1301, 169 PFSYNC_Q_INS_1400, 170 PFSYNC_Q_INS_1500, 171 PFSYNC_Q_IACK, 172 PFSYNC_Q_UPD_1301, 173 PFSYNC_Q_UPD_1400, 174 PFSYNC_Q_UPD_1500, 175 PFSYNC_Q_UPD_C, 176 PFSYNC_Q_DEL_C, 177 PFSYNC_Q_COUNT, 178 }; 179 180 /* Functions for building messages for given queue */ 181 static void pfsync_out_state_1301(struct pf_kstate *, void *); 182 static void pfsync_out_state_1400(struct pf_kstate *, void *); 183 static void pfsync_out_state_1500(struct pf_kstate *, void *); 184 static void pfsync_out_iack(struct pf_kstate *, void *); 185 static void pfsync_out_upd_c(struct pf_kstate *, void *); 186 static void pfsync_out_del_c(struct pf_kstate *, void *); 187 188 /* Attach those functions to queue */ 189 static struct pfsync_q pfsync_qs[] = { 190 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, 191 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, 192 { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 }, 193 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 194 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, 195 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, 196 { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 }, 197 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 198 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 199 }; 200 201 /* Map queue to pf_kstate->sync_state */ 202 static u_int8_t pfsync_qid_sstate[] = { 203 PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ 204 PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ 205 PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */ 206 PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ 207 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ 208 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ 209 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */ 210 PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ 211 PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ 212 }; 213 214 /* Map pf_kstate->sync_state to queue */ 215 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); 216 217 static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); 218 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 219 220 static void pfsync_update_state(struct pf_kstate *); 221 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 222 223 struct pfsync_upd_req_item { 224 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 225 struct pfsync_upd_req ur_msg; 226 }; 227 228 struct pfsync_deferral { 229 struct pfsync_softc *pd_sc; 230 TAILQ_ENTRY(pfsync_deferral) pd_entry; 231 struct callout pd_tmo; 232 233 struct pf_kstate *pd_st; 234 struct mbuf *pd_m; 235 }; 236 237 struct pfsync_bucket 238 { 239 int b_id; 240 struct pfsync_softc *b_sc; 241 struct mtx b_mtx; 242 struct callout b_tmo; 243 int b_flags; 244 #define PFSYNCF_BUCKET_PUSH 0x00000001 245 246 size_t b_len; 247 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; 248 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 249 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 250 u_int b_deferred; 251 uint8_t *b_plus; 252 size_t b_pluslen; 253 254 struct ifaltq b_snd; 255 }; 256 257 struct pfsync_softc { 258 /* Configuration */ 259 struct ifnet *sc_ifp; 260 struct ifnet *sc_sync_if; 261 struct ip_moptions sc_imo; 262 struct ip6_moptions sc_im6o; 263 struct sockaddr_storage sc_sync_peer; 264 uint32_t sc_flags; 265 uint8_t sc_maxupdates; 266 union inet_template sc_template; 267 struct mtx sc_mtx; 268 uint32_t sc_version; 269 270 /* Queued data */ 271 struct pfsync_bucket *sc_buckets; 272 273 /* Bulk update info */ 274 struct mtx sc_bulk_mtx; 275 uint32_t sc_ureq_sent; 276 int sc_bulk_tries; 277 uint32_t sc_ureq_received; 278 int sc_bulk_hashid; 279 uint64_t sc_bulk_stateid; 280 uint32_t sc_bulk_creatorid; 281 struct callout sc_bulk_tmo; 282 struct callout sc_bulkfail_tmo; 283 }; 284 285 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 286 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 287 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 288 289 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 290 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 291 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 292 293 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 294 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 295 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 296 297 #define PFSYNC_DEFER_TIMEOUT 20 298 299 static const char pfsyncname[] = "pfsync"; 300 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 301 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 302 #define V_pfsyncif VNET(pfsyncif) 303 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 304 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 305 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 306 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 307 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 308 #define V_pfsyncstats VNET(pfsyncstats) 309 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 310 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 311 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; 312 #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) 313 314 static void pfsync_timeout(void *); 315 static void pfsync_push(struct pfsync_bucket *); 316 static void pfsync_push_all(struct pfsync_softc *); 317 static void pfsyncintr(void *); 318 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 319 struct in_mfilter *, struct in6_mfilter *); 320 static void pfsync_multicast_cleanup(struct pfsync_softc *); 321 static void pfsync_pointers_init(void); 322 static void pfsync_pointers_uninit(void); 323 static int pfsync_init(void); 324 static void pfsync_uninit(void); 325 326 static unsigned long pfsync_buckets; 327 328 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 329 "PFSYNC"); 330 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 331 &VNET_NAME(pfsyncstats), pfsyncstats, 332 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 333 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 334 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 335 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 336 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 337 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, 338 &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); 339 340 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 341 static void pfsync_clone_destroy(struct ifnet *); 342 static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, 343 struct pf_state_peer *); 344 static int pfsyncoutput(struct ifnet *, struct mbuf *, 345 const struct sockaddr *, struct route *); 346 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 347 348 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 349 static void pfsync_undefer(struct pfsync_deferral *, int); 350 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 351 static void pfsync_undefer_state(struct pf_kstate *, int); 352 static void pfsync_defer_tmo(void *); 353 354 static void pfsync_request_update(u_int32_t, u_int64_t); 355 static bool pfsync_update_state_req(struct pf_kstate *); 356 357 static void pfsync_drop_all(struct pfsync_softc *); 358 static void pfsync_drop(struct pfsync_softc *, int); 359 static void pfsync_sendout(int, int); 360 static void pfsync_send_plus(void *, size_t); 361 362 static void pfsync_bulk_start(void); 363 static void pfsync_bulk_status(u_int8_t); 364 static void pfsync_bulk_update(void *); 365 static void pfsync_bulk_fail(void *); 366 367 static void pfsync_detach_ifnet(struct ifnet *); 368 369 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 370 struct pfsync_kstatus *); 371 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 372 struct pfsync_softc *); 373 374 #ifdef IPSEC 375 static void pfsync_update_net_tdb(struct pfsync_tdb *); 376 #endif 377 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 378 struct pf_kstate *); 379 380 #define PFSYNC_MAX_BULKTRIES 12 381 382 VNET_DEFINE(struct if_clone *, pfsync_cloner); 383 #define V_pfsync_cloner VNET(pfsync_cloner) 384 385 const struct in6_addr in6addr_linklocal_pfsync_group = 386 {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; 388 static int 389 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 390 { 391 struct pfsync_softc *sc; 392 struct ifnet *ifp; 393 struct pfsync_bucket *b; 394 int c; 395 enum pfsync_q_id q; 396 397 if (unit != 0) 398 return (EINVAL); 399 400 if (! pfsync_buckets) 401 pfsync_buckets = mp_ncpus * 2; 402 403 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 404 sc->sc_flags |= PFSYNCF_OK; 405 sc->sc_maxupdates = 128; 406 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 407 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 408 M_PFSYNC, M_ZERO | M_WAITOK); 409 for (c = 0; c < pfsync_buckets; c++) { 410 b = &sc->sc_buckets[c]; 411 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 412 413 b->b_id = c; 414 b->b_sc = sc; 415 b->b_len = PFSYNC_MINPKT; 416 417 for (q = 0; q < PFSYNC_Q_COUNT; q++) 418 TAILQ_INIT(&b->b_qs[q]); 419 420 TAILQ_INIT(&b->b_upd_req_list); 421 TAILQ_INIT(&b->b_deferrals); 422 423 callout_init(&b->b_tmo, 1); 424 425 b->b_snd.ifq_maxlen = ifqmaxlen; 426 } 427 428 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 429 if_initname(ifp, pfsyncname, unit); 430 ifp->if_softc = sc; 431 ifp->if_ioctl = pfsyncioctl; 432 ifp->if_output = pfsyncoutput; 433 ifp->if_hdrlen = sizeof(struct pfsync_header); 434 ifp->if_mtu = ETHERMTU; 435 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 436 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 437 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 438 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 439 440 if_attach(ifp); 441 442 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 443 444 V_pfsyncif = sc; 445 446 return (0); 447 } 448 449 static void 450 pfsync_clone_destroy(struct ifnet *ifp) 451 { 452 struct pfsync_softc *sc = ifp->if_softc; 453 struct pfsync_bucket *b; 454 int c, ret; 455 456 for (c = 0; c < pfsync_buckets; c++) { 457 b = &sc->sc_buckets[c]; 458 /* 459 * At this stage, everything should have already been 460 * cleared by pfsync_uninit(), and we have only to 461 * drain callouts. 462 */ 463 PFSYNC_BUCKET_LOCK(b); 464 while (b->b_deferred > 0) { 465 struct pfsync_deferral *pd = 466 TAILQ_FIRST(&b->b_deferrals); 467 468 ret = callout_stop(&pd->pd_tmo); 469 PFSYNC_BUCKET_UNLOCK(b); 470 if (ret > 0) { 471 pfsync_undefer(pd, 1); 472 } else { 473 callout_drain(&pd->pd_tmo); 474 } 475 PFSYNC_BUCKET_LOCK(b); 476 } 477 MPASS(b->b_deferred == 0); 478 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 479 PFSYNC_BUCKET_UNLOCK(b); 480 481 free(b->b_plus, M_PFSYNC); 482 b->b_plus = NULL; 483 b->b_pluslen = 0; 484 485 callout_drain(&b->b_tmo); 486 } 487 488 callout_drain(&sc->sc_bulkfail_tmo); 489 callout_drain(&sc->sc_bulk_tmo); 490 491 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 492 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 493 bpfdetach(ifp); 494 if_detach(ifp); 495 496 pfsync_drop_all(sc); 497 498 if_free(ifp); 499 pfsync_multicast_cleanup(sc); 500 mtx_destroy(&sc->sc_mtx); 501 mtx_destroy(&sc->sc_bulk_mtx); 502 503 for (c = 0; c < pfsync_buckets; c++) { 504 b = &sc->sc_buckets[c]; 505 mtx_destroy(&b->b_mtx); 506 } 507 free(sc->sc_buckets, M_PFSYNC); 508 free(sc, M_PFSYNC); 509 510 V_pfsyncif = NULL; 511 } 512 513 static int 514 pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, 515 struct pf_state_peer *d) 516 { 517 if (s->scrub.scrub_flag && d->scrub == NULL) { 518 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 519 if (d->scrub == NULL) 520 return (ENOMEM); 521 } 522 523 return (0); 524 } 525 526 static int 527 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) 528 { 529 struct pfsync_softc *sc = V_pfsyncif; 530 #ifndef __NO_STRICT_ALIGNMENT 531 struct pfsync_state_key key[2]; 532 #endif 533 struct pfsync_state_key *kw, *ks; 534 struct pf_kstate *st = NULL; 535 struct pf_state_key *skw = NULL, *sks = NULL; 536 struct pf_krule *r = NULL; 537 struct pfi_kkif *kif, *orig_kif; 538 struct pfi_kkif *rt_kif = NULL; 539 struct pf_kpooladdr *rpool_first; 540 int error; 541 int n = 0; 542 sa_family_t rt_af = 0; 543 uint8_t rt = 0; 544 sa_family_t wire_af, stack_af; 545 u_int8_t wire_proto, stack_proto; 546 547 PF_RULES_RASSERT(); 548 549 if (sp->pfs_1301.creatorid == 0) { 550 if (V_pf_status.debug >= PF_DEBUG_MISC) 551 printf("%s: invalid creator id: %08x\n", __func__, 552 ntohl(sp->pfs_1301.creatorid)); 553 return (EINVAL); 554 } 555 556 /* 557 * Check interfaces early on. Do it before allocating memory etc. 558 * Because there is a high chance there will be a lot more such states. 559 */ 560 if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { 561 if (V_pf_status.debug >= PF_DEBUG_MISC) 562 printf("%s: unknown interface: %s\n", __func__, 563 sp->pfs_1301.ifname); 564 if (flags & PFSYNC_SI_IOCTL) 565 return (EINVAL); 566 return (0); /* skip this state */ 567 } 568 569 /* 570 * States created with floating interface policy can be synchronized to 571 * hosts with different interfaces, because they are bound to V_pfi_all. 572 * But s->orig_kif still points to a real interface. Don't abort 573 * importing the state if orig_kif does not exists on the importing host 574 * but the state is not interface-bound. 575 */ 576 if (msg_version == PFSYNC_MSG_VERSION_1500) { 577 orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname); 578 if (orig_kif == NULL) { 579 if (kif == V_pfi_all) { 580 orig_kif = kif; 581 } else { 582 if (V_pf_status.debug >= PF_DEBUG_MISC) 583 printf("%s: unknown original interface:" 584 " %s\n", __func__, 585 sp->pfs_1500.orig_ifname); 586 if (flags & PFSYNC_SI_IOCTL) 587 return (EINVAL); 588 return (0); /* skip this state */ 589 } 590 } 591 } 592 593 /* 594 * If the ruleset checksums match or the state is coming from the ioctl, 595 * it's safe to associate the state with the rule of that number. 596 */ 597 if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && 598 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < 599 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { 600 TAILQ_FOREACH(r, pf_main_ruleset.rules[ 601 PF_RULESET_FILTER].active.ptr, entries) 602 if (ntohl(sp->pfs_1301.rule) == n++) 603 break; 604 } else 605 r = &V_pf_default_rule; 606 607 switch (msg_version) { 608 case PFSYNC_MSG_VERSION_1301: 609 /* 610 * On FreeBSD <= 13 the routing interface and routing operation 611 * are not sent over pfsync. If the ruleset is identical, 612 * though, we might be able to recover the routing information 613 * from the local ruleset. 614 */ 615 if (r != &V_pf_default_rule) { 616 struct pf_kpool *pool = &r->route; 617 618 /* Backwards compatibility. */ 619 if (TAILQ_EMPTY(&pool->list)) 620 pool = &r->rdr; 621 622 /* 623 * The ruleset is identical, try to recover. If the rule 624 * has a redirection pool with a single interface, there 625 * is a chance that this interface is identical as on 626 * the pfsync peer. If there's more than one interface, 627 * give up, as we can't be sure that we will pick the 628 * same one as the pfsync peer did. 629 */ 630 rpool_first = TAILQ_FIRST(&(pool->list)); 631 if ((rpool_first == NULL) || 632 (TAILQ_NEXT(rpool_first, entries) != NULL)) { 633 DPFPRINTF(PF_DEBUG_MISC, 634 "%s: can't recover routing information " 635 "because of empty or bad redirection pool", 636 __func__); 637 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 638 } 639 rt = r->rt; 640 rt_kif = rpool_first->kif; 641 /* 642 * Guess the AF of the route address, FreeBSD 13 does 643 * not support af-to nor prefer-ipv6-nexthop 644 * so it should be safe. 645 */ 646 rt_af = r->af; 647 } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { 648 /* 649 * Ruleset different, routing *supposedly* requested, 650 * give up on recovering. 651 */ 652 DPFPRINTF(PF_DEBUG_MISC, 653 "%s: can't recover routing information " 654 "because of different ruleset", __func__); 655 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 656 } 657 wire_af = stack_af = sp->pfs_1301.af; 658 wire_proto = stack_proto = sp->pfs_1301.proto; 659 break; 660 case PFSYNC_MSG_VERSION_1400: 661 /* 662 * On FreeBSD 14 we're not taking any chances. 663 * We use the information synced to us. 664 */ 665 if (sp->pfs_1400.rt) { 666 rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); 667 if (rt_kif == NULL) { 668 DPFPRINTF(PF_DEBUG_MISC, 669 "%s: unknown route interface: %s", 670 __func__, sp->pfs_1400.rt_ifname); 671 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 672 } 673 rt = sp->pfs_1400.rt; 674 /* 675 * Guess the AF of the route address, FreeBSD 14 does 676 * not support af-to nor prefer-ipv6-nexthop 677 * so it should be safe. 678 */ 679 rt_af = sp->pfs_1400.af; 680 } 681 wire_af = stack_af = sp->pfs_1400.af; 682 wire_proto = stack_proto = sp->pfs_1400.proto; 683 break; 684 case PFSYNC_MSG_VERSION_1500: 685 /* 686 * On FreeBSD 15 and above we're not taking any chances. 687 * We use the information synced to us. 688 */ 689 if (sp->pfs_1500.rt) { 690 rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname); 691 if (rt_kif == NULL) { 692 DPFPRINTF(PF_DEBUG_MISC, 693 "%s: unknown route interface: %s", 694 __func__, sp->pfs_1500.rt_ifname); 695 return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); 696 } 697 rt = sp->pfs_1500.rt; 698 rt_af = sp->pfs_1500.rt_af; 699 } 700 wire_af = sp->pfs_1500.wire_af; 701 stack_af = sp->pfs_1500.stack_af; 702 wire_proto = sp->pfs_1500.wire_proto; 703 stack_proto = sp->pfs_1500.stack_proto; 704 break; 705 } 706 707 if ((r->max_states && 708 counter_u64_fetch(r->states_cur) >= r->max_states)) 709 goto cleanup; 710 711 /* 712 * XXXGL: consider M_WAITOK in ioctl path after. 713 */ 714 st = pf_alloc_state(M_NOWAIT); 715 if (__predict_false(st == NULL)) 716 goto cleanup; 717 718 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 719 goto cleanup; 720 721 #ifndef __NO_STRICT_ALIGNMENT 722 bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); 723 kw = &key[PF_SK_WIRE]; 724 ks = &key[PF_SK_STACK]; 725 #else 726 kw = &sp->pfs_1301.key[PF_SK_WIRE]; 727 ks = &sp->pfs_1301.key[PF_SK_STACK]; 728 #endif 729 730 if (wire_af != stack_af || 731 PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) || 732 PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) || 733 kw->port[0] != ks->port[0] || 734 kw->port[1] != ks->port[1]) { 735 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 736 if (sks == NULL) 737 goto cleanup; 738 } else 739 sks = skw; 740 741 /* allocate memory for scrub info */ 742 if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || 743 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) 744 goto cleanup; 745 746 /* Copy to state key(s). */ 747 skw->addr[0] = kw->addr[0]; 748 skw->addr[1] = kw->addr[1]; 749 skw->port[0] = kw->port[0]; 750 skw->port[1] = kw->port[1]; 751 skw->proto = wire_proto; 752 skw->af = wire_af; 753 if (sks != skw) { 754 sks->addr[0] = ks->addr[0]; 755 sks->addr[1] = ks->addr[1]; 756 sks->port[0] = ks->port[0]; 757 sks->port[1] = ks->port[1]; 758 sks->proto = stack_proto; 759 sks->af = stack_af; 760 } 761 762 /* copy to state */ 763 st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; 764 st->act.rt = rt; 765 st->act.rt_kif = rt_kif; 766 st->act.rt_af = rt_af; 767 768 switch (msg_version) { 769 case PFSYNC_MSG_VERSION_1301: 770 st->state_flags = sp->pfs_1301.state_flags; 771 st->direction = sp->pfs_1301.direction; 772 st->act.log = sp->pfs_1301.log; 773 st->timeout = sp->pfs_1301.timeout; 774 if (rt) 775 bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, 776 sizeof(st->act.rt_addr)); 777 /* 778 * In FreeBSD 13 pfsync lacks many attributes. Copy them 779 * from the rule if possible. If rule can't be matched 780 * clear any set options as we can't recover their 781 * parameters. 782 */ 783 if (r == &V_pf_default_rule) { 784 st->state_flags &= ~PFSTATE_SETMASK; 785 } else { 786 /* 787 * Similar to pf_rule_to_actions(). This code 788 * won't set the actions properly if they come 789 * from multiple "match" rules as only rule 790 * creating the state is send over pfsync. 791 */ 792 st->act.qid = r->qid; 793 st->act.pqid = r->pqid; 794 st->act.rtableid = r->rtableid; 795 if (r->scrub_flags & PFSTATE_SETTOS) 796 st->act.set_tos = r->set_tos; 797 st->act.min_ttl = r->min_ttl; 798 st->act.max_mss = r->max_mss; 799 st->state_flags |= (r->scrub_flags & 800 (PFSTATE_NODF|PFSTATE_RANDOMID| 801 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| 802 PFSTATE_SETPRIO)); 803 if (r->dnpipe || r->dnrpipe) { 804 if (r->free_flags & PFRULE_DN_IS_PIPE) 805 st->state_flags |= PFSTATE_DN_IS_PIPE; 806 else 807 st->state_flags &= ~PFSTATE_DN_IS_PIPE; 808 } 809 st->act.dnpipe = r->dnpipe; 810 st->act.dnrpipe = r->dnrpipe; 811 } 812 break; 813 case PFSYNC_MSG_VERSION_1400: 814 st->state_flags = ntohs(sp->pfs_1400.state_flags); 815 st->direction = sp->pfs_1400.direction; 816 st->act.log = sp->pfs_1400.log; 817 st->timeout = sp->pfs_1400.timeout; 818 st->act.qid = ntohs(sp->pfs_1400.qid); 819 st->act.pqid = ntohs(sp->pfs_1400.pqid); 820 st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); 821 st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); 822 st->act.rtableid = ntohl(sp->pfs_1400.rtableid); 823 st->act.min_ttl = sp->pfs_1400.min_ttl; 824 st->act.set_tos = sp->pfs_1400.set_tos; 825 st->act.max_mss = ntohs(sp->pfs_1400.max_mss); 826 st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; 827 st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; 828 if (rt) 829 bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr, 830 sizeof(st->act.rt_addr)); 831 break; 832 case PFSYNC_MSG_VERSION_1500: 833 st->state_flags = ntohs(sp->pfs_1500.state_flags); 834 st->direction = sp->pfs_1500.direction; 835 st->act.log = sp->pfs_1500.log; 836 st->timeout = sp->pfs_1500.timeout; 837 st->act.qid = ntohs(sp->pfs_1500.qid); 838 st->act.pqid = ntohs(sp->pfs_1500.pqid); 839 st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe); 840 st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe); 841 st->act.rtableid = ntohl(sp->pfs_1500.rtableid); 842 st->act.min_ttl = sp->pfs_1500.min_ttl; 843 st->act.set_tos = sp->pfs_1500.set_tos; 844 st->act.max_mss = ntohs(sp->pfs_1500.max_mss); 845 st->act.set_prio[0] = sp->pfs_1500.set_prio[0]; 846 st->act.set_prio[1] = sp->pfs_1500.set_prio[1]; 847 if (rt) 848 bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr, 849 sizeof(st->act.rt_addr)); 850 if (sp->pfs_1500.tagname[0] != 0) 851 st->tag = pf_tagname2tag(sp->pfs_1500.tagname); 852 break; 853 default: 854 panic("%s: Unsupported pfsync_msg_version %d", 855 __func__, msg_version); 856 } 857 858 st->expire = pf_get_uptime(); 859 if (sp->pfs_1301.expire) { 860 uint32_t timeout; 861 timeout = r->timeout[st->timeout]; 862 if (!timeout) 863 timeout = V_pf_default_rule.timeout[st->timeout]; 864 865 /* sp->expire may have been adaptively scaled by export. */ 866 st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; 867 } 868 869 if (! (st->act.rtableid == -1 || 870 (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) 871 goto cleanup; 872 873 st->id = sp->pfs_1301.id; 874 st->creatorid = sp->pfs_1301.creatorid; 875 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 876 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 877 878 st->rule = r; 879 st->nat_rule = NULL; 880 st->anchor = NULL; 881 882 st->pfsync_time = time_uptime; 883 st->sync_state = PFSYNC_S_NONE; 884 885 if (!(flags & PFSYNC_SI_IOCTL)) 886 st->state_flags |= PFSTATE_NOSYNC; 887 888 if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0) 889 goto cleanup_state; 890 891 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 892 counter_u64_add(r->states_cur, 1); 893 counter_u64_add(r->states_tot, 1); 894 895 if (!(flags & PFSYNC_SI_IOCTL)) { 896 st->state_flags &= ~PFSTATE_NOSYNC; 897 if (st->state_flags & PFSTATE_ACK) { 898 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 899 PFSYNC_BUCKET_LOCK(b); 900 pfsync_q_ins(st, PFSYNC_S_IACK, true); 901 PFSYNC_BUCKET_UNLOCK(b); 902 903 pfsync_push_all(sc); 904 } 905 } 906 st->state_flags &= ~PFSTATE_ACK; 907 PF_STATE_UNLOCK(st); 908 909 return (0); 910 911 cleanup: 912 error = ENOMEM; 913 914 if (skw == sks) 915 sks = NULL; 916 uma_zfree(V_pf_state_key_z, skw); 917 uma_zfree(V_pf_state_key_z, sks); 918 919 cleanup_state: /* pf_state_insert() frees the state keys. */ 920 if (st) { 921 st->timeout = PFTM_UNLINKED; /* appease an assert */ 922 pf_free_state(st); 923 } 924 return (error); 925 } 926 927 #ifdef INET 928 static int 929 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 930 { 931 struct pfsync_softc *sc = V_pfsyncif; 932 struct mbuf *m = *mp; 933 struct ip *ip = mtod(m, struct ip *); 934 struct pfsync_header *ph; 935 struct pfsync_subheader subh; 936 937 int offset, len, flags = 0; 938 int rv; 939 uint16_t count; 940 941 PF_RULES_RLOCK_TRACKER; 942 943 *mp = NULL; 944 V_pfsyncstats.pfsyncs_ipackets++; 945 946 /* Verify that we have a sync interface configured. */ 947 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 948 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 949 goto done; 950 951 /* verify that the packet came in on the right interface */ 952 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 953 V_pfsyncstats.pfsyncs_badif++; 954 goto done; 955 } 956 957 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 958 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 959 /* verify that the IP TTL is 255. */ 960 if (ip->ip_ttl != PFSYNC_DFLTTL) { 961 V_pfsyncstats.pfsyncs_badttl++; 962 goto done; 963 } 964 965 offset = ip->ip_hl << 2; 966 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 967 V_pfsyncstats.pfsyncs_hdrops++; 968 goto done; 969 } 970 971 if (offset + sizeof(*ph) > m->m_len) { 972 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 973 V_pfsyncstats.pfsyncs_hdrops++; 974 return (IPPROTO_DONE); 975 } 976 ip = mtod(m, struct ip *); 977 } 978 ph = (struct pfsync_header *)((char *)ip + offset); 979 980 /* verify the version */ 981 if (ph->version != PFSYNC_VERSION) { 982 V_pfsyncstats.pfsyncs_badver++; 983 goto done; 984 } 985 986 len = ntohs(ph->len) + offset; 987 if (m->m_pkthdr.len < len) { 988 V_pfsyncstats.pfsyncs_badlen++; 989 goto done; 990 } 991 992 /* 993 * Trusting pf_chksum during packet processing, as well as seeking 994 * in interface name tree, require holding PF_RULES_RLOCK(). 995 */ 996 PF_RULES_RLOCK(); 997 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 998 flags = PFSYNC_SI_CKSUM; 999 1000 offset += sizeof(*ph); 1001 while (offset <= len - sizeof(subh)) { 1002 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1003 offset += sizeof(subh); 1004 1005 if (subh.action >= PFSYNC_ACT_MAX) { 1006 V_pfsyncstats.pfsyncs_badact++; 1007 PF_RULES_RUNLOCK(); 1008 goto done; 1009 } 1010 1011 count = ntohs(subh.count); 1012 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1013 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1014 if (rv == -1) { 1015 PF_RULES_RUNLOCK(); 1016 return (IPPROTO_DONE); 1017 } 1018 1019 offset += rv; 1020 } 1021 PF_RULES_RUNLOCK(); 1022 1023 done: 1024 m_freem(m); 1025 return (IPPROTO_DONE); 1026 } 1027 #endif 1028 1029 #ifdef INET6 1030 static int 1031 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) 1032 { 1033 struct pfsync_softc *sc = V_pfsyncif; 1034 struct mbuf *m = *mp; 1035 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1036 struct pfsync_header *ph; 1037 struct pfsync_subheader subh; 1038 1039 int offset, len, flags = 0; 1040 int rv; 1041 uint16_t count; 1042 1043 PF_RULES_RLOCK_TRACKER; 1044 1045 *mp = NULL; 1046 V_pfsyncstats.pfsyncs_ipackets++; 1047 1048 /* Verify that we have a sync interface configured. */ 1049 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 1050 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1051 goto done; 1052 1053 /* verify that the packet came in on the right interface */ 1054 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 1055 V_pfsyncstats.pfsyncs_badif++; 1056 goto done; 1057 } 1058 1059 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 1060 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1061 /* verify that the IP TTL is 255. */ 1062 if (ip6->ip6_hlim != PFSYNC_DFLTTL) { 1063 V_pfsyncstats.pfsyncs_badttl++; 1064 goto done; 1065 } 1066 1067 1068 offset = sizeof(*ip6); 1069 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 1070 V_pfsyncstats.pfsyncs_hdrops++; 1071 goto done; 1072 } 1073 1074 if (offset + sizeof(*ph) > m->m_len) { 1075 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 1076 V_pfsyncstats.pfsyncs_hdrops++; 1077 return (IPPROTO_DONE); 1078 } 1079 ip6 = mtod(m, struct ip6_hdr *); 1080 } 1081 ph = (struct pfsync_header *)((char *)ip6 + offset); 1082 1083 /* verify the version */ 1084 if (ph->version != PFSYNC_VERSION) { 1085 V_pfsyncstats.pfsyncs_badver++; 1086 goto done; 1087 } 1088 1089 len = ntohs(ph->len) + offset; 1090 if (m->m_pkthdr.len < len) { 1091 V_pfsyncstats.pfsyncs_badlen++; 1092 goto done; 1093 } 1094 1095 /* 1096 * Trusting pf_chksum during packet processing, as well as seeking 1097 * in interface name tree, require holding PF_RULES_RLOCK(). 1098 */ 1099 PF_RULES_RLOCK(); 1100 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 1101 flags = PFSYNC_SI_CKSUM; 1102 1103 offset += sizeof(*ph); 1104 while (offset <= len - sizeof(subh)) { 1105 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 1106 offset += sizeof(subh); 1107 1108 if (subh.action >= PFSYNC_ACT_MAX) { 1109 V_pfsyncstats.pfsyncs_badact++; 1110 PF_RULES_RUNLOCK(); 1111 goto done; 1112 } 1113 1114 count = ntohs(subh.count); 1115 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 1116 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 1117 if (rv == -1) { 1118 PF_RULES_RUNLOCK(); 1119 return (IPPROTO_DONE); 1120 } 1121 1122 offset += rv; 1123 } 1124 PF_RULES_RUNLOCK(); 1125 1126 done: 1127 m_freem(m); 1128 return (IPPROTO_DONE); 1129 } 1130 #endif 1131 1132 static int 1133 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) 1134 { 1135 struct pfsync_clr *clr; 1136 struct mbuf *mp; 1137 int len = sizeof(*clr) * count; 1138 int i, offp; 1139 u_int32_t creatorid; 1140 1141 mp = m_pulldown(m, offset, len, &offp); 1142 if (mp == NULL) { 1143 V_pfsyncstats.pfsyncs_badlen++; 1144 return (-1); 1145 } 1146 clr = (struct pfsync_clr *)(mp->m_data + offp); 1147 1148 for (i = 0; i < count; i++) { 1149 creatorid = clr[i].creatorid; 1150 1151 if (clr[i].ifname[0] != '\0' && 1152 pfi_kkif_find(clr[i].ifname) == NULL) 1153 continue; 1154 1155 for (int i = 0; i <= V_pf_hashmask; i++) { 1156 struct pf_idhash *ih = &V_pf_idhash[i]; 1157 struct pf_kstate *s; 1158 relock: 1159 PF_HASHROW_LOCK(ih); 1160 LIST_FOREACH(s, &ih->states, entry) { 1161 if (s->creatorid == creatorid) { 1162 s->state_flags |= PFSTATE_NOSYNC; 1163 pf_remove_state(s); 1164 goto relock; 1165 } 1166 } 1167 PF_HASHROW_UNLOCK(ih); 1168 } 1169 } 1170 1171 return (len); 1172 } 1173 1174 static int 1175 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) 1176 { 1177 struct mbuf *mp; 1178 union pfsync_state_union *sa, *sp; 1179 int i, offp, total_len, msg_version, msg_len; 1180 u_int8_t timeout, direction; 1181 sa_family_t af; 1182 1183 switch (action) { 1184 case PFSYNC_ACT_INS_1301: 1185 msg_len = sizeof(struct pfsync_state_1301); 1186 msg_version = PFSYNC_MSG_VERSION_1301; 1187 break; 1188 case PFSYNC_ACT_INS_1400: 1189 msg_len = sizeof(struct pfsync_state_1400); 1190 msg_version = PFSYNC_MSG_VERSION_1400; 1191 break; 1192 case PFSYNC_ACT_INS_1500: 1193 msg_len = sizeof(struct pfsync_state_1500); 1194 msg_version = PFSYNC_MSG_VERSION_1500; 1195 break; 1196 default: 1197 V_pfsyncstats.pfsyncs_badver++; 1198 return (-1); 1199 } 1200 1201 total_len = msg_len * count; 1202 1203 mp = m_pulldown(m, offset, total_len, &offp); 1204 if (mp == NULL) { 1205 V_pfsyncstats.pfsyncs_badlen++; 1206 return (-1); 1207 } 1208 sa = (union pfsync_state_union *)(mp->m_data + offp); 1209 1210 for (i = 0; i < count; i++) { 1211 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1212 1213 switch (msg_version) { 1214 case PFSYNC_MSG_VERSION_1301: 1215 case PFSYNC_MSG_VERSION_1400: 1216 af = sp->pfs_1301.af; 1217 timeout = sp->pfs_1301.timeout; 1218 direction = sp->pfs_1301.direction; 1219 break; 1220 case PFSYNC_MSG_VERSION_1500: 1221 af = sp->pfs_1500.wire_af; 1222 timeout = sp->pfs_1500.timeout; 1223 direction = sp->pfs_1500.direction; 1224 break; 1225 } 1226 1227 /* Check for invalid values. */ 1228 if (timeout >= PFTM_MAX || 1229 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1230 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || 1231 direction > PF_OUT || 1232 (af != AF_INET && af != AF_INET6)) { 1233 if (V_pf_status.debug >= PF_DEBUG_MISC) 1234 printf("%s: invalid value\n", __func__); 1235 V_pfsyncstats.pfsyncs_badval++; 1236 continue; 1237 } 1238 1239 if (pfsync_state_import(sp, flags, msg_version) != 0) 1240 V_pfsyncstats.pfsyncs_badact++; 1241 } 1242 1243 return (total_len); 1244 } 1245 1246 static int 1247 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) 1248 { 1249 struct pfsync_ins_ack *ia, *iaa; 1250 struct pf_kstate *st; 1251 1252 struct mbuf *mp; 1253 int len = count * sizeof(*ia); 1254 int offp, i; 1255 1256 mp = m_pulldown(m, offset, len, &offp); 1257 if (mp == NULL) { 1258 V_pfsyncstats.pfsyncs_badlen++; 1259 return (-1); 1260 } 1261 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 1262 1263 for (i = 0; i < count; i++) { 1264 ia = &iaa[i]; 1265 1266 st = pf_find_state_byid(ia->id, ia->creatorid); 1267 if (st == NULL) 1268 continue; 1269 1270 if (st->state_flags & PFSTATE_ACK) { 1271 pfsync_undefer_state(st, 0); 1272 } 1273 PF_STATE_UNLOCK(st); 1274 } 1275 /* 1276 * XXX this is not yet implemented, but we know the size of the 1277 * message so we can skip it. 1278 */ 1279 1280 return (count * sizeof(struct pfsync_ins_ack)); 1281 } 1282 1283 static int 1284 pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, 1285 struct pf_state_peer_export *dst) 1286 { 1287 int sync = 0; 1288 1289 PF_STATE_LOCK_ASSERT(st); 1290 1291 /* 1292 * The state should never go backwards except 1293 * for syn-proxy states. Neither should the 1294 * sequence window slide backwards. 1295 */ 1296 if ((st->src.state > src->state && 1297 (st->src.state < PF_TCPS_PROXY_SRC || 1298 src->state >= PF_TCPS_PROXY_SRC)) || 1299 1300 (st->src.state == src->state && 1301 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 1302 sync++; 1303 else 1304 pf_state_peer_ntoh(src, &st->src); 1305 1306 if ((st->dst.state > dst->state) || 1307 1308 (st->dst.state >= TCPS_SYN_SENT && 1309 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 1310 sync++; 1311 else 1312 pf_state_peer_ntoh(dst, &st->dst); 1313 1314 return (sync); 1315 } 1316 1317 static int 1318 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) 1319 { 1320 struct pfsync_softc *sc = V_pfsyncif; 1321 union pfsync_state_union *sa, *sp; 1322 struct pf_kstate *st; 1323 struct mbuf *mp; 1324 int sync, offp, i, total_len, msg_len, msg_version; 1325 u_int8_t timeout; 1326 1327 switch (action) { 1328 case PFSYNC_ACT_UPD_1301: 1329 msg_len = sizeof(struct pfsync_state_1301); 1330 msg_version = PFSYNC_MSG_VERSION_1301; 1331 break; 1332 case PFSYNC_ACT_UPD_1400: 1333 msg_len = sizeof(struct pfsync_state_1400); 1334 msg_version = PFSYNC_MSG_VERSION_1400; 1335 break; 1336 case PFSYNC_ACT_UPD_1500: 1337 msg_len = sizeof(struct pfsync_state_1500); 1338 msg_version = PFSYNC_MSG_VERSION_1500; 1339 break; 1340 default: 1341 V_pfsyncstats.pfsyncs_badact++; 1342 return (-1); 1343 } 1344 1345 total_len = msg_len * count; 1346 1347 mp = m_pulldown(m, offset, total_len, &offp); 1348 if (mp == NULL) { 1349 V_pfsyncstats.pfsyncs_badlen++; 1350 return (-1); 1351 } 1352 sa = (union pfsync_state_union *)(mp->m_data + offp); 1353 1354 for (i = 0; i < count; i++) { 1355 sp = (union pfsync_state_union *)((char *)sa + msg_len * i); 1356 1357 switch (msg_version) { 1358 case PFSYNC_MSG_VERSION_1301: 1359 case PFSYNC_MSG_VERSION_1400: 1360 timeout = sp->pfs_1301.timeout; 1361 break; 1362 case PFSYNC_MSG_VERSION_1500: 1363 timeout = sp->pfs_1500.timeout; 1364 break; 1365 } 1366 1367 /* check for invalid values */ 1368 if (timeout >= PFTM_MAX || 1369 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1370 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { 1371 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1372 printf("pfsync_input: PFSYNC_ACT_UPD: " 1373 "invalid value\n"); 1374 } 1375 V_pfsyncstats.pfsyncs_badval++; 1376 continue; 1377 } 1378 1379 st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); 1380 if (st == NULL) { 1381 /* insert the update */ 1382 if (pfsync_state_import(sp, flags, msg_version)) 1383 V_pfsyncstats.pfsyncs_badstate++; 1384 continue; 1385 } 1386 1387 if (st->state_flags & PFSTATE_ACK) { 1388 pfsync_undefer_state(st, 1); 1389 } 1390 1391 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1392 sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); 1393 else { 1394 sync = 0; 1395 1396 /* 1397 * Non-TCP protocol state machine always go 1398 * forwards 1399 */ 1400 if (st->src.state > sp->pfs_1301.src.state) 1401 sync++; 1402 else 1403 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 1404 if (st->dst.state > sp->pfs_1301.dst.state) 1405 sync++; 1406 else 1407 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1408 } 1409 if (sync < 2) { 1410 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); 1411 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1412 st->expire = pf_get_uptime(); 1413 st->timeout = timeout; 1414 } 1415 st->pfsync_time = time_uptime; 1416 1417 if (sync) { 1418 V_pfsyncstats.pfsyncs_stale++; 1419 1420 pfsync_update_state(st); 1421 PF_STATE_UNLOCK(st); 1422 pfsync_push_all(sc); 1423 continue; 1424 } 1425 PF_STATE_UNLOCK(st); 1426 } 1427 1428 return (total_len); 1429 } 1430 1431 static int 1432 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) 1433 { 1434 struct pfsync_softc *sc = V_pfsyncif; 1435 struct pfsync_upd_c *ua, *up; 1436 struct pf_kstate *st; 1437 int len = count * sizeof(*up); 1438 int sync; 1439 struct mbuf *mp; 1440 int offp, i; 1441 1442 mp = m_pulldown(m, offset, len, &offp); 1443 if (mp == NULL) { 1444 V_pfsyncstats.pfsyncs_badlen++; 1445 return (-1); 1446 } 1447 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1448 1449 for (i = 0; i < count; i++) { 1450 up = &ua[i]; 1451 1452 /* check for invalid values */ 1453 if (up->timeout >= PFTM_MAX || 1454 up->src.state > PF_TCPS_PROXY_DST || 1455 up->dst.state > PF_TCPS_PROXY_DST) { 1456 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1457 printf("pfsync_input: " 1458 "PFSYNC_ACT_UPD_C: " 1459 "invalid value\n"); 1460 } 1461 V_pfsyncstats.pfsyncs_badval++; 1462 continue; 1463 } 1464 1465 st = pf_find_state_byid(up->id, up->creatorid); 1466 if (st == NULL) { 1467 /* We don't have this state. Ask for it. */ 1468 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1469 pfsync_request_update(up->creatorid, up->id); 1470 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1471 continue; 1472 } 1473 1474 if (st->state_flags & PFSTATE_ACK) { 1475 pfsync_undefer_state(st, 1); 1476 } 1477 1478 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1479 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1480 else { 1481 sync = 0; 1482 1483 /* 1484 * Non-TCP protocol state machine always go 1485 * forwards 1486 */ 1487 if (st->src.state > up->src.state) 1488 sync++; 1489 else 1490 pf_state_peer_ntoh(&up->src, &st->src); 1491 if (st->dst.state > up->dst.state) 1492 sync++; 1493 else 1494 pf_state_peer_ntoh(&up->dst, &st->dst); 1495 } 1496 if (sync < 2) { 1497 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1498 pf_state_peer_ntoh(&up->dst, &st->dst); 1499 st->expire = pf_get_uptime(); 1500 st->timeout = up->timeout; 1501 } 1502 st->pfsync_time = time_uptime; 1503 1504 if (sync) { 1505 V_pfsyncstats.pfsyncs_stale++; 1506 1507 pfsync_update_state(st); 1508 PF_STATE_UNLOCK(st); 1509 pfsync_push_all(sc); 1510 continue; 1511 } 1512 PF_STATE_UNLOCK(st); 1513 } 1514 1515 return (len); 1516 } 1517 1518 static int 1519 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) 1520 { 1521 struct pfsync_upd_req *ur, *ura; 1522 struct mbuf *mp; 1523 int len = count * sizeof(*ur); 1524 int i, offp; 1525 1526 struct pf_kstate *st; 1527 1528 mp = m_pulldown(m, offset, len, &offp); 1529 if (mp == NULL) { 1530 V_pfsyncstats.pfsyncs_badlen++; 1531 return (-1); 1532 } 1533 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1534 1535 for (i = 0; i < count; i++) { 1536 ur = &ura[i]; 1537 1538 if (ur->id == 0 && ur->creatorid == 0) 1539 pfsync_bulk_start(); 1540 else { 1541 st = pf_find_state_byid(ur->id, ur->creatorid); 1542 if (st == NULL) { 1543 V_pfsyncstats.pfsyncs_badstate++; 1544 continue; 1545 } 1546 if (st->state_flags & PFSTATE_NOSYNC) { 1547 PF_STATE_UNLOCK(st); 1548 continue; 1549 } 1550 1551 pfsync_update_state_req(st); 1552 PF_STATE_UNLOCK(st); 1553 } 1554 } 1555 1556 return (len); 1557 } 1558 1559 static int 1560 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) 1561 { 1562 struct mbuf *mp; 1563 struct pfsync_del_c *sa, *sp; 1564 struct pf_kstate *st; 1565 int len = count * sizeof(*sp); 1566 int offp, i; 1567 1568 mp = m_pulldown(m, offset, len, &offp); 1569 if (mp == NULL) { 1570 V_pfsyncstats.pfsyncs_badlen++; 1571 return (-1); 1572 } 1573 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1574 1575 for (i = 0; i < count; i++) { 1576 sp = &sa[i]; 1577 1578 st = pf_find_state_byid(sp->id, sp->creatorid); 1579 if (st == NULL) { 1580 V_pfsyncstats.pfsyncs_badstate++; 1581 continue; 1582 } 1583 1584 st->state_flags |= PFSTATE_NOSYNC; 1585 pf_remove_state(st); 1586 } 1587 1588 return (len); 1589 } 1590 1591 static int 1592 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) 1593 { 1594 struct pfsync_softc *sc = V_pfsyncif; 1595 struct pfsync_bus *bus; 1596 struct mbuf *mp; 1597 int len = count * sizeof(*bus); 1598 int offp; 1599 1600 PFSYNC_BLOCK(sc); 1601 1602 /* If we're not waiting for a bulk update, who cares. */ 1603 if (sc->sc_ureq_sent == 0) { 1604 PFSYNC_BUNLOCK(sc); 1605 return (len); 1606 } 1607 1608 mp = m_pulldown(m, offset, len, &offp); 1609 if (mp == NULL) { 1610 PFSYNC_BUNLOCK(sc); 1611 V_pfsyncstats.pfsyncs_badlen++; 1612 return (-1); 1613 } 1614 bus = (struct pfsync_bus *)(mp->m_data + offp); 1615 1616 switch (bus->status) { 1617 case PFSYNC_BUS_START: 1618 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1619 V_pf_limits[PF_LIMIT_STATES].limit / 1620 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1621 sizeof(union pfsync_state_union)), 1622 pfsync_bulk_fail, sc); 1623 if (V_pf_status.debug >= PF_DEBUG_MISC) 1624 printf("pfsync: received bulk update start\n"); 1625 break; 1626 1627 case PFSYNC_BUS_END: 1628 if (time_uptime - ntohl(bus->endtime) >= 1629 sc->sc_ureq_sent) { 1630 /* that's it, we're happy */ 1631 sc->sc_ureq_sent = 0; 1632 sc->sc_bulk_tries = 0; 1633 callout_stop(&sc->sc_bulkfail_tmo); 1634 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1635 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1636 "pfsync bulk done"); 1637 sc->sc_flags |= PFSYNCF_OK; 1638 if (V_pf_status.debug >= PF_DEBUG_MISC) 1639 printf("pfsync: received valid " 1640 "bulk update end\n"); 1641 } else { 1642 if (V_pf_status.debug >= PF_DEBUG_MISC) 1643 printf("pfsync: received invalid " 1644 "bulk update end: bad timestamp\n"); 1645 } 1646 break; 1647 } 1648 PFSYNC_BUNLOCK(sc); 1649 1650 return (len); 1651 } 1652 1653 static int 1654 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) 1655 { 1656 int len = count * sizeof(struct pfsync_tdb); 1657 1658 #if defined(IPSEC) 1659 struct pfsync_tdb *tp; 1660 struct mbuf *mp; 1661 int offp; 1662 int i; 1663 int s; 1664 1665 mp = m_pulldown(m, offset, len, &offp); 1666 if (mp == NULL) { 1667 V_pfsyncstats.pfsyncs_badlen++; 1668 return (-1); 1669 } 1670 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1671 1672 for (i = 0; i < count; i++) 1673 pfsync_update_net_tdb(&tp[i]); 1674 #endif 1675 1676 return (len); 1677 } 1678 1679 #if defined(IPSEC) 1680 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1681 static void 1682 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1683 { 1684 struct tdb *tdb; 1685 int s; 1686 1687 /* check for invalid values */ 1688 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1689 (pt->dst.sa.sa_family != AF_INET && 1690 pt->dst.sa.sa_family != AF_INET6)) 1691 goto bad; 1692 1693 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1694 if (tdb) { 1695 pt->rpl = ntohl(pt->rpl); 1696 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1697 1698 /* Neither replay nor byte counter should ever decrease. */ 1699 if (pt->rpl < tdb->tdb_rpl || 1700 pt->cur_bytes < tdb->tdb_cur_bytes) { 1701 goto bad; 1702 } 1703 1704 tdb->tdb_rpl = pt->rpl; 1705 tdb->tdb_cur_bytes = pt->cur_bytes; 1706 } 1707 return; 1708 1709 bad: 1710 if (V_pf_status.debug >= PF_DEBUG_MISC) 1711 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1712 "invalid value\n"); 1713 V_pfsyncstats.pfsyncs_badstate++; 1714 return; 1715 } 1716 #endif 1717 1718 static int 1719 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) 1720 { 1721 /* check if we are at the right place in the packet */ 1722 if (offset != m->m_pkthdr.len) 1723 V_pfsyncstats.pfsyncs_badlen++; 1724 1725 /* we're done. free and let the caller return */ 1726 m_freem(m); 1727 return (-1); 1728 } 1729 1730 static int 1731 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) 1732 { 1733 V_pfsyncstats.pfsyncs_badact++; 1734 1735 m_freem(m); 1736 return (-1); 1737 } 1738 1739 static int 1740 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1741 struct route *rt) 1742 { 1743 m_freem(m); 1744 return (0); 1745 } 1746 1747 /* ARGSUSED */ 1748 static int 1749 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1750 { 1751 struct pfsync_softc *sc = ifp->if_softc; 1752 struct ifreq *ifr = (struct ifreq *)data; 1753 struct pfsyncreq pfsyncr; 1754 size_t nvbuflen; 1755 int error; 1756 int c; 1757 1758 switch (cmd) { 1759 case SIOCSIFFLAGS: 1760 PFSYNC_LOCK(sc); 1761 if (ifp->if_flags & IFF_UP) { 1762 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1763 PFSYNC_UNLOCK(sc); 1764 pfsync_pointers_init(); 1765 } else { 1766 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1767 PFSYNC_UNLOCK(sc); 1768 pfsync_pointers_uninit(); 1769 } 1770 break; 1771 case SIOCSIFMTU: 1772 if (!sc->sc_sync_if || 1773 ifr->ifr_mtu <= PFSYNC_MINPKT || 1774 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1775 return (EINVAL); 1776 if (ifr->ifr_mtu < ifp->if_mtu) { 1777 for (c = 0; c < pfsync_buckets; c++) { 1778 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1779 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1780 pfsync_sendout(1, c); 1781 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1782 } 1783 } 1784 ifp->if_mtu = ifr->ifr_mtu; 1785 break; 1786 case SIOCGETPFSYNC: 1787 bzero(&pfsyncr, sizeof(pfsyncr)); 1788 PFSYNC_LOCK(sc); 1789 if (sc->sc_sync_if) { 1790 strlcpy(pfsyncr.pfsyncr_syncdev, 1791 sc->sc_sync_if->if_xname, IFNAMSIZ); 1792 } 1793 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1794 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1795 pfsyncr.pfsyncr_defer = sc->sc_flags; 1796 PFSYNC_UNLOCK(sc); 1797 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1798 sizeof(pfsyncr))); 1799 1800 case SIOCGETPFSYNCNV: 1801 { 1802 nvlist_t *nvl_syncpeer; 1803 nvlist_t *nvl = nvlist_create(0); 1804 1805 if (nvl == NULL) 1806 return (ENOMEM); 1807 1808 if (sc->sc_sync_if) 1809 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1810 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1811 nvlist_add_number(nvl, "flags", sc->sc_flags); 1812 nvlist_add_number(nvl, "version", sc->sc_version); 1813 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1814 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1815 1816 void *packed = NULL; 1817 packed = nvlist_pack(nvl, &nvbuflen); 1818 if (packed == NULL) { 1819 free(packed, M_NVLIST); 1820 nvlist_destroy(nvl); 1821 return (ENOMEM); 1822 } 1823 1824 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1825 ifr->ifr_cap_nv.length = nvbuflen; 1826 ifr->ifr_cap_nv.buffer = NULL; 1827 free(packed, M_NVLIST); 1828 nvlist_destroy(nvl); 1829 return (EFBIG); 1830 } 1831 1832 ifr->ifr_cap_nv.length = nvbuflen; 1833 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1834 1835 nvlist_destroy(nvl); 1836 nvlist_destroy(nvl_syncpeer); 1837 free(packed, M_NVLIST); 1838 break; 1839 } 1840 1841 case SIOCSETPFSYNC: 1842 { 1843 struct pfsync_kstatus status; 1844 1845 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1846 return (error); 1847 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1848 sizeof(pfsyncr)))) 1849 return (error); 1850 1851 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1852 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1853 1854 error = pfsync_kstatus_to_softc(&status, sc); 1855 return (error); 1856 } 1857 case SIOCSETPFSYNCNV: 1858 { 1859 struct pfsync_kstatus status; 1860 void *data; 1861 nvlist_t *nvl; 1862 1863 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1864 return (error); 1865 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1866 return (EINVAL); 1867 1868 data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); 1869 1870 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1871 ifr->ifr_cap_nv.length)) != 0) { 1872 free(data, M_PF); 1873 return (error); 1874 } 1875 1876 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1877 free(data, M_PF); 1878 return (EINVAL); 1879 } 1880 1881 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1882 pfsync_nvstatus_to_kstatus(nvl, &status); 1883 1884 nvlist_destroy(nvl); 1885 free(data, M_PF); 1886 1887 error = pfsync_kstatus_to_softc(&status, sc); 1888 return (error); 1889 } 1890 default: 1891 return (ENOTTY); 1892 } 1893 1894 return (0); 1895 } 1896 1897 static void 1898 pfsync_out_state_1301(struct pf_kstate *st, void *buf) 1899 { 1900 union pfsync_state_union *sp = buf; 1901 1902 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); 1903 } 1904 1905 static void 1906 pfsync_out_state_1400(struct pf_kstate *st, void *buf) 1907 { 1908 union pfsync_state_union *sp = buf; 1909 1910 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); 1911 } 1912 1913 static void 1914 pfsync_out_state_1500(struct pf_kstate *st, void *buf) 1915 { 1916 union pfsync_state_union *sp = buf; 1917 1918 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1500); 1919 } 1920 1921 static void 1922 pfsync_out_iack(struct pf_kstate *st, void *buf) 1923 { 1924 struct pfsync_ins_ack *iack = buf; 1925 1926 iack->id = st->id; 1927 iack->creatorid = st->creatorid; 1928 } 1929 1930 static void 1931 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1932 { 1933 struct pfsync_upd_c *up = buf; 1934 1935 bzero(up, sizeof(*up)); 1936 up->id = st->id; 1937 pf_state_peer_hton(&st->src, &up->src); 1938 pf_state_peer_hton(&st->dst, &up->dst); 1939 up->creatorid = st->creatorid; 1940 up->timeout = st->timeout; 1941 } 1942 1943 static void 1944 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1945 { 1946 struct pfsync_del_c *dp = buf; 1947 1948 dp->id = st->id; 1949 dp->creatorid = st->creatorid; 1950 st->state_flags |= PFSTATE_NOSYNC; 1951 } 1952 1953 static void 1954 pfsync_drop_all(struct pfsync_softc *sc) 1955 { 1956 struct pfsync_bucket *b; 1957 int c; 1958 1959 for (c = 0; c < pfsync_buckets; c++) { 1960 b = &sc->sc_buckets[c]; 1961 1962 PFSYNC_BUCKET_LOCK(b); 1963 pfsync_drop(sc, c); 1964 PFSYNC_BUCKET_UNLOCK(b); 1965 } 1966 } 1967 1968 static void 1969 pfsync_drop(struct pfsync_softc *sc, int c) 1970 { 1971 struct pf_kstate *st, *next; 1972 struct pfsync_upd_req_item *ur; 1973 struct pfsync_bucket *b; 1974 enum pfsync_q_id q; 1975 1976 b = &sc->sc_buckets[c]; 1977 PFSYNC_BUCKET_LOCK_ASSERT(b); 1978 1979 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1980 if (TAILQ_EMPTY(&b->b_qs[q])) 1981 continue; 1982 1983 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1984 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1985 ("%s: st->sync_state %d == q %d", 1986 __func__, st->sync_state, q)); 1987 st->sync_state = PFSYNC_S_NONE; 1988 pf_release_state(st); 1989 } 1990 TAILQ_INIT(&b->b_qs[q]); 1991 } 1992 1993 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1994 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1995 free(ur, M_PFSYNC); 1996 } 1997 1998 b->b_len = PFSYNC_MINPKT; 1999 free(b->b_plus, M_PFSYNC); 2000 b->b_plus = NULL; 2001 b->b_pluslen = 0; 2002 } 2003 2004 static void 2005 pfsync_sendout(int schedswi, int c) 2006 { 2007 struct pfsync_softc *sc = V_pfsyncif; 2008 struct ifnet *ifp = sc->sc_ifp; 2009 struct mbuf *m; 2010 struct pfsync_header *ph; 2011 struct pfsync_subheader *subh; 2012 struct pf_kstate *st, *st_next; 2013 struct pfsync_upd_req_item *ur; 2014 struct pfsync_bucket *b = &sc->sc_buckets[c]; 2015 size_t len; 2016 int aflen, offset, count = 0; 2017 enum pfsync_q_id q; 2018 2019 KASSERT(sc != NULL, ("%s: null sc", __func__)); 2020 KASSERT(b->b_len > PFSYNC_MINPKT, 2021 ("%s: sc_len %zu", __func__, b->b_len)); 2022 PFSYNC_BUCKET_LOCK_ASSERT(b); 2023 2024 if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) { 2025 pfsync_drop(sc, c); 2026 return; 2027 } 2028 2029 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 2030 if (m == NULL) { 2031 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2032 V_pfsyncstats.pfsyncs_onomem++; 2033 return; 2034 } 2035 m->m_data += max_linkhdr; 2036 bzero(m->m_data, b->b_len); 2037 2038 len = b->b_len; 2039 2040 /* build the ip header */ 2041 switch (sc->sc_sync_peer.ss_family) { 2042 #ifdef INET 2043 case AF_INET: 2044 { 2045 struct ip *ip; 2046 2047 ip = mtod(m, struct ip *); 2048 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 2049 aflen = offset = sizeof(*ip); 2050 2051 len -= sizeof(union inet_template) - sizeof(struct ip); 2052 ip->ip_len = htons(len); 2053 ip_fillid(ip, V_ip_random_id); 2054 break; 2055 } 2056 #endif 2057 #ifdef INET6 2058 case AF_INET6: 2059 { 2060 struct ip6_hdr *ip6; 2061 2062 ip6 = mtod(m, struct ip6_hdr *); 2063 bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); 2064 aflen = offset = sizeof(*ip6); 2065 2066 len -= sizeof(union inet_template) - sizeof(struct ip6_hdr); 2067 ip6->ip6_plen = htons(len); 2068 break; 2069 } 2070 #endif 2071 default: 2072 m_freem(m); 2073 pfsync_drop(sc, c); 2074 return; 2075 } 2076 m->m_len = m->m_pkthdr.len = len; 2077 2078 /* build the pfsync header */ 2079 ph = (struct pfsync_header *)(m->m_data + offset); 2080 offset += sizeof(*ph); 2081 2082 ph->version = PFSYNC_VERSION; 2083 ph->len = htons(len - aflen); 2084 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 2085 2086 /* walk the queues */ 2087 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 2088 if (TAILQ_EMPTY(&b->b_qs[q])) 2089 continue; 2090 2091 subh = (struct pfsync_subheader *)(m->m_data + offset); 2092 offset += sizeof(*subh); 2093 2094 count = 0; 2095 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 2096 KASSERT(st->sync_state == pfsync_qid_sstate[q], 2097 ("%s: st->sync_state == q", 2098 __func__)); 2099 /* 2100 * XXXGL: some of write methods do unlocked reads 2101 * of state data :( 2102 */ 2103 pfsync_qs[q].write(st, m->m_data + offset); 2104 offset += pfsync_qs[q].len; 2105 st->sync_state = PFSYNC_S_NONE; 2106 pf_release_state(st); 2107 count++; 2108 } 2109 TAILQ_INIT(&b->b_qs[q]); 2110 2111 subh->action = pfsync_qs[q].action; 2112 subh->count = htons(count); 2113 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 2114 } 2115 2116 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 2117 subh = (struct pfsync_subheader *)(m->m_data + offset); 2118 offset += sizeof(*subh); 2119 2120 count = 0; 2121 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 2122 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 2123 2124 bcopy(&ur->ur_msg, m->m_data + offset, 2125 sizeof(ur->ur_msg)); 2126 offset += sizeof(ur->ur_msg); 2127 free(ur, M_PFSYNC); 2128 count++; 2129 } 2130 2131 subh->action = PFSYNC_ACT_UPD_REQ; 2132 subh->count = htons(count); 2133 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 2134 } 2135 2136 /* has someone built a custom region for us to add? */ 2137 if (b->b_plus != NULL) { 2138 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 2139 offset += b->b_pluslen; 2140 2141 free(b->b_plus, M_PFSYNC); 2142 b->b_plus = NULL; 2143 b->b_pluslen = 0; 2144 } 2145 2146 subh = (struct pfsync_subheader *)(m->m_data + offset); 2147 offset += sizeof(*subh); 2148 2149 subh->action = PFSYNC_ACT_EOF; 2150 subh->count = htons(1); 2151 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 2152 2153 /* we're done, let's put it on the wire */ 2154 if (bpf_peers_present(ifp->if_bpf)) { 2155 m->m_data += aflen; 2156 m->m_len = m->m_pkthdr.len = len - aflen; 2157 bpf_mtap(ifp->if_bpf, m); 2158 m->m_data -= aflen; 2159 m->m_len = m->m_pkthdr.len = len; 2160 } 2161 2162 if (sc->sc_sync_if == NULL) { 2163 b->b_len = PFSYNC_MINPKT; 2164 m_freem(m); 2165 return; 2166 } 2167 2168 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 2169 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 2170 b->b_len = PFSYNC_MINPKT; 2171 2172 if (!_IF_QFULL(&b->b_snd)) 2173 _IF_ENQUEUE(&b->b_snd, m); 2174 else { 2175 m_freem(m); 2176 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 2177 } 2178 if (schedswi) 2179 swi_sched(V_pfsync_swi_cookie, 0); 2180 } 2181 2182 static void 2183 pfsync_insert_state(struct pf_kstate *st) 2184 { 2185 struct pfsync_softc *sc = V_pfsyncif; 2186 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2187 2188 if (st->state_flags & PFSTATE_NOSYNC) 2189 return; 2190 2191 if ((st->rule->rule_flag & PFRULE_NOSYNC) || 2192 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 2193 st->state_flags |= PFSTATE_NOSYNC; 2194 return; 2195 } 2196 2197 KASSERT(st->sync_state == PFSYNC_S_NONE, 2198 ("%s: st->sync_state %u", __func__, st->sync_state)); 2199 2200 PFSYNC_BUCKET_LOCK(b); 2201 if (b->b_len == PFSYNC_MINPKT) 2202 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2203 2204 pfsync_q_ins(st, PFSYNC_S_INS, true); 2205 PFSYNC_BUCKET_UNLOCK(b); 2206 2207 st->sync_updates = 0; 2208 } 2209 2210 static int 2211 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 2212 { 2213 struct pfsync_softc *sc = V_pfsyncif; 2214 struct pfsync_deferral *pd; 2215 struct pfsync_bucket *b; 2216 2217 if (m->m_flags & (M_BCAST|M_MCAST)) 2218 return (0); 2219 2220 if (sc == NULL) 2221 return (0); 2222 2223 b = pfsync_get_bucket(sc, st); 2224 2225 PFSYNC_LOCK(sc); 2226 2227 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 2228 !(sc->sc_flags & PFSYNCF_DEFER)) { 2229 PFSYNC_UNLOCK(sc); 2230 return (0); 2231 } 2232 2233 PFSYNC_BUCKET_LOCK(b); 2234 PFSYNC_UNLOCK(sc); 2235 2236 if (b->b_deferred >= 128) 2237 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 2238 2239 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 2240 if (pd == NULL) { 2241 PFSYNC_BUCKET_UNLOCK(b); 2242 return (0); 2243 } 2244 b->b_deferred++; 2245 2246 m->m_flags |= M_SKIP_FIREWALL; 2247 st->state_flags |= PFSTATE_ACK; 2248 2249 pd->pd_sc = sc; 2250 pd->pd_st = st; 2251 pf_ref_state(st); 2252 pd->pd_m = m; 2253 2254 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 2255 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 2256 callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, 2257 pfsync_defer_tmo, pd); 2258 2259 pfsync_push(b); 2260 PFSYNC_BUCKET_UNLOCK(b); 2261 2262 return (1); 2263 } 2264 2265 static void 2266 pfsync_undefer(struct pfsync_deferral *pd, int drop) 2267 { 2268 struct pfsync_softc *sc = pd->pd_sc; 2269 struct mbuf *m = pd->pd_m; 2270 struct pf_kstate *st = pd->pd_st; 2271 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2272 2273 PFSYNC_BUCKET_LOCK_ASSERT(b); 2274 2275 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2276 b->b_deferred--; 2277 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2278 free(pd, M_PFSYNC); 2279 pf_release_state(st); 2280 2281 if (drop) 2282 m_freem(m); 2283 else { 2284 _IF_ENQUEUE(&b->b_snd, m); 2285 pfsync_push(b); 2286 } 2287 } 2288 2289 static void 2290 pfsync_defer_tmo(void *arg) 2291 { 2292 struct epoch_tracker et; 2293 struct pfsync_deferral *pd = arg; 2294 struct pfsync_softc *sc = pd->pd_sc; 2295 struct mbuf *m = pd->pd_m; 2296 struct pf_kstate *st = pd->pd_st; 2297 struct pfsync_bucket *b; 2298 2299 CURVNET_SET(sc->sc_ifp->if_vnet); 2300 2301 b = pfsync_get_bucket(sc, st); 2302 2303 PFSYNC_BUCKET_LOCK_ASSERT(b); 2304 2305 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 2306 b->b_deferred--; 2307 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 2308 PFSYNC_BUCKET_UNLOCK(b); 2309 free(pd, M_PFSYNC); 2310 2311 if (sc->sc_sync_if == NULL) { 2312 pf_release_state(st); 2313 m_freem(m); 2314 CURVNET_RESTORE(); 2315 return; 2316 } 2317 2318 NET_EPOCH_ENTER(et); 2319 2320 pfsync_tx(sc, m); 2321 2322 pf_release_state(st); 2323 2324 CURVNET_RESTORE(); 2325 NET_EPOCH_EXIT(et); 2326 } 2327 2328 static void 2329 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 2330 { 2331 struct pfsync_softc *sc = V_pfsyncif; 2332 struct pfsync_deferral *pd; 2333 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2334 2335 PFSYNC_BUCKET_LOCK_ASSERT(b); 2336 2337 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 2338 if (pd->pd_st == st) { 2339 if (callout_stop(&pd->pd_tmo) > 0) 2340 pfsync_undefer(pd, drop); 2341 2342 return; 2343 } 2344 } 2345 2346 panic("%s: unable to find deferred state", __func__); 2347 } 2348 2349 static void 2350 pfsync_undefer_state(struct pf_kstate *st, int drop) 2351 { 2352 struct pfsync_softc *sc = V_pfsyncif; 2353 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2354 2355 PFSYNC_BUCKET_LOCK(b); 2356 pfsync_undefer_state_locked(st, drop); 2357 PFSYNC_BUCKET_UNLOCK(b); 2358 } 2359 2360 static struct pfsync_bucket* 2361 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 2362 { 2363 int c = PF_IDHASH(st) % pfsync_buckets; 2364 return &sc->sc_buckets[c]; 2365 } 2366 2367 static void 2368 pfsync_update_state(struct pf_kstate *st) 2369 { 2370 struct pfsync_softc *sc = V_pfsyncif; 2371 bool sync = false, ref = true; 2372 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2373 2374 PF_STATE_LOCK_ASSERT(st); 2375 PFSYNC_BUCKET_LOCK(b); 2376 2377 if (st->state_flags & PFSTATE_ACK) 2378 pfsync_undefer_state_locked(st, 0); 2379 if (st->state_flags & PFSTATE_NOSYNC) { 2380 if (st->sync_state != PFSYNC_S_NONE) 2381 pfsync_q_del(st, true, b); 2382 PFSYNC_BUCKET_UNLOCK(b); 2383 return; 2384 } 2385 2386 if (b->b_len == PFSYNC_MINPKT) 2387 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2388 2389 switch (st->sync_state) { 2390 case PFSYNC_S_UPD_C: 2391 case PFSYNC_S_UPD: 2392 case PFSYNC_S_INS: 2393 /* we're already handling it */ 2394 2395 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2396 st->sync_updates++; 2397 if (st->sync_updates >= sc->sc_maxupdates) 2398 sync = true; 2399 } 2400 break; 2401 2402 case PFSYNC_S_IACK: 2403 pfsync_q_del(st, false, b); 2404 ref = false; 2405 /* FALLTHROUGH */ 2406 2407 case PFSYNC_S_NONE: 2408 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 2409 st->sync_updates = 0; 2410 break; 2411 2412 default: 2413 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2414 } 2415 2416 if (sync || (time_uptime - st->pfsync_time) < 2) 2417 pfsync_push(b); 2418 2419 PFSYNC_BUCKET_UNLOCK(b); 2420 } 2421 2422 static void 2423 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2424 { 2425 struct pfsync_softc *sc = V_pfsyncif; 2426 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2427 struct pfsync_upd_req_item *item; 2428 size_t nlen = sizeof(struct pfsync_upd_req); 2429 2430 PFSYNC_BUCKET_LOCK_ASSERT(b); 2431 2432 /* 2433 * This code does a bit to prevent multiple update requests for the 2434 * same state being generated. It searches current subheader queue, 2435 * but it doesn't lookup into queue of already packed datagrams. 2436 */ 2437 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 2438 if (item->ur_msg.id == id && 2439 item->ur_msg.creatorid == creatorid) 2440 return; 2441 2442 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 2443 if (item == NULL) 2444 return; /* XXX stats */ 2445 2446 item->ur_msg.id = id; 2447 item->ur_msg.creatorid = creatorid; 2448 2449 if (TAILQ_EMPTY(&b->b_upd_req_list)) 2450 nlen += sizeof(struct pfsync_subheader); 2451 2452 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2453 pfsync_sendout(0, 0); 2454 2455 nlen = sizeof(struct pfsync_subheader) + 2456 sizeof(struct pfsync_upd_req); 2457 } 2458 2459 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2460 b->b_len += nlen; 2461 2462 pfsync_push(b); 2463 } 2464 2465 static bool 2466 pfsync_update_state_req(struct pf_kstate *st) 2467 { 2468 struct pfsync_softc *sc = V_pfsyncif; 2469 bool ref = true, full = false; 2470 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2471 2472 PF_STATE_LOCK_ASSERT(st); 2473 PFSYNC_BUCKET_LOCK(b); 2474 2475 if (st->state_flags & PFSTATE_NOSYNC) { 2476 if (st->sync_state != PFSYNC_S_NONE) 2477 pfsync_q_del(st, true, b); 2478 PFSYNC_BUCKET_UNLOCK(b); 2479 return (full); 2480 } 2481 2482 switch (st->sync_state) { 2483 case PFSYNC_S_UPD_C: 2484 case PFSYNC_S_IACK: 2485 pfsync_q_del(st, false, b); 2486 ref = false; 2487 /* FALLTHROUGH */ 2488 2489 case PFSYNC_S_NONE: 2490 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2491 pfsync_push(b); 2492 break; 2493 2494 case PFSYNC_S_INS: 2495 case PFSYNC_S_UPD: 2496 case PFSYNC_S_DEL_C: 2497 /* we're already handling it */ 2498 break; 2499 2500 default: 2501 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2502 } 2503 2504 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) 2505 full = true; 2506 2507 PFSYNC_BUCKET_UNLOCK(b); 2508 2509 return (full); 2510 } 2511 2512 static void 2513 pfsync_delete_state(struct pf_kstate *st) 2514 { 2515 struct pfsync_softc *sc = V_pfsyncif; 2516 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2517 bool ref = true; 2518 2519 PFSYNC_BUCKET_LOCK(b); 2520 if (st->state_flags & PFSTATE_ACK) 2521 pfsync_undefer_state_locked(st, 1); 2522 if (st->state_flags & PFSTATE_NOSYNC) { 2523 if (st->sync_state != PFSYNC_S_NONE) 2524 pfsync_q_del(st, true, b); 2525 PFSYNC_BUCKET_UNLOCK(b); 2526 return; 2527 } 2528 2529 if (b->b_len == PFSYNC_MINPKT) 2530 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2531 2532 switch (st->sync_state) { 2533 case PFSYNC_S_INS: 2534 /* We never got to tell the world so just forget about it. */ 2535 pfsync_q_del(st, true, b); 2536 break; 2537 2538 case PFSYNC_S_UPD_C: 2539 case PFSYNC_S_UPD: 2540 case PFSYNC_S_IACK: 2541 pfsync_q_del(st, false, b); 2542 ref = false; 2543 /* FALLTHROUGH */ 2544 2545 case PFSYNC_S_NONE: 2546 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2547 break; 2548 2549 default: 2550 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2551 } 2552 2553 PFSYNC_BUCKET_UNLOCK(b); 2554 } 2555 2556 static void 2557 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2558 { 2559 struct { 2560 struct pfsync_subheader subh; 2561 struct pfsync_clr clr; 2562 } __packed r; 2563 2564 bzero(&r, sizeof(r)); 2565 2566 r.subh.action = PFSYNC_ACT_CLR; 2567 r.subh.count = htons(1); 2568 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2569 2570 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2571 r.clr.creatorid = creatorid; 2572 2573 pfsync_send_plus(&r, sizeof(r)); 2574 } 2575 2576 static enum pfsync_q_id 2577 pfsync_sstate_to_qid(u_int8_t sync_state) 2578 { 2579 struct pfsync_softc *sc = V_pfsyncif; 2580 2581 switch (sync_state) { 2582 case PFSYNC_S_INS: 2583 switch (sc->sc_version) { 2584 case PFSYNC_MSG_VERSION_1301: 2585 return PFSYNC_Q_INS_1301; 2586 case PFSYNC_MSG_VERSION_1400: 2587 return PFSYNC_Q_INS_1400; 2588 case PFSYNC_MSG_VERSION_1500: 2589 return PFSYNC_Q_INS_1500; 2590 } 2591 break; 2592 case PFSYNC_S_IACK: 2593 return PFSYNC_Q_IACK; 2594 case PFSYNC_S_UPD: 2595 switch (sc->sc_version) { 2596 case PFSYNC_MSG_VERSION_1301: 2597 return PFSYNC_Q_UPD_1301; 2598 case PFSYNC_MSG_VERSION_1400: 2599 return PFSYNC_Q_UPD_1400; 2600 case PFSYNC_MSG_VERSION_1500: 2601 return PFSYNC_Q_UPD_1500; 2602 } 2603 break; 2604 case PFSYNC_S_UPD_C: 2605 return PFSYNC_Q_UPD_C; 2606 case PFSYNC_S_DEL_C: 2607 return PFSYNC_Q_DEL_C; 2608 default: 2609 panic("%s: Unsupported st->sync_state 0x%02x", 2610 __func__, sync_state); 2611 } 2612 2613 panic("%s: Unsupported pfsync_msg_version %d", 2614 __func__, sc->sc_version); 2615 } 2616 2617 static void 2618 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) 2619 { 2620 enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); 2621 struct pfsync_softc *sc = V_pfsyncif; 2622 size_t nlen = pfsync_qs[q].len; 2623 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2624 2625 PFSYNC_BUCKET_LOCK_ASSERT(b); 2626 2627 KASSERT(st->sync_state == PFSYNC_S_NONE, 2628 ("%s: st->sync_state %u", __func__, st->sync_state)); 2629 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2630 b->b_len)); 2631 2632 if (TAILQ_EMPTY(&b->b_qs[q])) 2633 nlen += sizeof(struct pfsync_subheader); 2634 2635 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2636 pfsync_sendout(1, b->b_id); 2637 2638 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2639 } 2640 2641 b->b_len += nlen; 2642 st->sync_state = pfsync_qid_sstate[q]; 2643 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2644 if (ref) 2645 pf_ref_state(st); 2646 } 2647 2648 static void 2649 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2650 { 2651 enum pfsync_q_id q; 2652 2653 PFSYNC_BUCKET_LOCK_ASSERT(b); 2654 KASSERT(st->sync_state != PFSYNC_S_NONE, 2655 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2656 2657 q = pfsync_sstate_to_qid(st->sync_state); 2658 b->b_len -= pfsync_qs[q].len; 2659 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2660 st->sync_state = PFSYNC_S_NONE; 2661 if (unref) 2662 pf_release_state(st); 2663 2664 if (TAILQ_EMPTY(&b->b_qs[q])) 2665 b->b_len -= sizeof(struct pfsync_subheader); 2666 } 2667 2668 static void 2669 pfsync_bulk_start(void) 2670 { 2671 struct pfsync_softc *sc = V_pfsyncif; 2672 2673 if (V_pf_status.debug >= PF_DEBUG_MISC) 2674 printf("pfsync: received bulk update request\n"); 2675 2676 PFSYNC_BLOCK(sc); 2677 2678 sc->sc_ureq_received = time_uptime; 2679 sc->sc_bulk_hashid = 0; 2680 sc->sc_bulk_stateid = 0; 2681 pfsync_bulk_status(PFSYNC_BUS_START); 2682 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2683 PFSYNC_BUNLOCK(sc); 2684 } 2685 2686 static void 2687 pfsync_bulk_update(void *arg) 2688 { 2689 struct pfsync_softc *sc = arg; 2690 struct pf_kstate *s; 2691 int i; 2692 2693 PFSYNC_BLOCK_ASSERT(sc); 2694 CURVNET_SET(sc->sc_ifp->if_vnet); 2695 2696 /* 2697 * Start with last state from previous invocation. 2698 * It may had gone, in this case start from the 2699 * hash slot. 2700 */ 2701 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2702 2703 if (s != NULL) 2704 i = PF_IDHASH(s); 2705 else 2706 i = sc->sc_bulk_hashid; 2707 2708 for (; i <= V_pf_hashmask; i++) { 2709 struct pf_idhash *ih = &V_pf_idhash[i]; 2710 2711 if (s != NULL) 2712 PF_HASHROW_ASSERT(ih); 2713 else { 2714 PF_HASHROW_LOCK(ih); 2715 s = LIST_FIRST(&ih->states); 2716 } 2717 2718 for (; s; s = LIST_NEXT(s, entry)) { 2719 if (s->sync_state == PFSYNC_S_NONE && 2720 s->timeout < PFTM_MAX && 2721 s->pfsync_time <= sc->sc_ureq_received) { 2722 if (pfsync_update_state_req(s)) { 2723 /* We've filled a packet. */ 2724 sc->sc_bulk_hashid = i; 2725 sc->sc_bulk_stateid = s->id; 2726 sc->sc_bulk_creatorid = s->creatorid; 2727 PF_HASHROW_UNLOCK(ih); 2728 callout_reset(&sc->sc_bulk_tmo, 1, 2729 pfsync_bulk_update, sc); 2730 goto full; 2731 } 2732 } 2733 } 2734 PF_HASHROW_UNLOCK(ih); 2735 } 2736 2737 /* We're done. */ 2738 pfsync_bulk_status(PFSYNC_BUS_END); 2739 full: 2740 CURVNET_RESTORE(); 2741 } 2742 2743 static void 2744 pfsync_bulk_status(u_int8_t status) 2745 { 2746 struct { 2747 struct pfsync_subheader subh; 2748 struct pfsync_bus bus; 2749 } __packed r; 2750 2751 struct pfsync_softc *sc = V_pfsyncif; 2752 2753 bzero(&r, sizeof(r)); 2754 2755 r.subh.action = PFSYNC_ACT_BUS; 2756 r.subh.count = htons(1); 2757 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2758 2759 r.bus.creatorid = V_pf_status.hostid; 2760 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2761 r.bus.status = status; 2762 2763 pfsync_send_plus(&r, sizeof(r)); 2764 } 2765 2766 static void 2767 pfsync_bulk_fail(void *arg) 2768 { 2769 struct pfsync_softc *sc = arg; 2770 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2771 2772 CURVNET_SET(sc->sc_ifp->if_vnet); 2773 2774 PFSYNC_BLOCK_ASSERT(sc); 2775 2776 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2777 /* Try again */ 2778 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2779 pfsync_bulk_fail, V_pfsyncif); 2780 PFSYNC_BUCKET_LOCK(b); 2781 pfsync_request_update(0, 0); 2782 PFSYNC_BUCKET_UNLOCK(b); 2783 } else { 2784 /* Pretend like the transfer was ok. */ 2785 sc->sc_ureq_sent = 0; 2786 sc->sc_bulk_tries = 0; 2787 PFSYNC_LOCK(sc); 2788 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2789 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2790 "pfsync bulk fail"); 2791 sc->sc_flags |= PFSYNCF_OK; 2792 PFSYNC_UNLOCK(sc); 2793 if (V_pf_status.debug >= PF_DEBUG_MISC) 2794 printf("pfsync: failed to receive bulk update\n"); 2795 } 2796 2797 CURVNET_RESTORE(); 2798 } 2799 2800 static void 2801 pfsync_send_plus(void *plus, size_t pluslen) 2802 { 2803 struct pfsync_softc *sc = V_pfsyncif; 2804 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2805 uint8_t *newplus; 2806 2807 PFSYNC_BUCKET_LOCK(b); 2808 2809 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2810 pfsync_sendout(1, b->b_id); 2811 2812 newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT); 2813 if (newplus == NULL) 2814 goto out; 2815 2816 if (b->b_plus != NULL) { 2817 memcpy(newplus, b->b_plus, b->b_pluslen); 2818 free(b->b_plus, M_PFSYNC); 2819 } else { 2820 MPASS(b->b_pluslen == 0); 2821 } 2822 memcpy(newplus + b->b_pluslen, plus, pluslen); 2823 2824 b->b_plus = newplus; 2825 b->b_pluslen += pluslen; 2826 b->b_len += pluslen; 2827 2828 pfsync_sendout(1, b->b_id); 2829 2830 out: 2831 PFSYNC_BUCKET_UNLOCK(b); 2832 } 2833 2834 static void 2835 pfsync_timeout(void *arg) 2836 { 2837 struct pfsync_bucket *b = arg; 2838 2839 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2840 PFSYNC_BUCKET_LOCK(b); 2841 pfsync_push(b); 2842 PFSYNC_BUCKET_UNLOCK(b); 2843 CURVNET_RESTORE(); 2844 } 2845 2846 static void 2847 pfsync_push(struct pfsync_bucket *b) 2848 { 2849 2850 PFSYNC_BUCKET_LOCK_ASSERT(b); 2851 2852 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2853 swi_sched(V_pfsync_swi_cookie, 0); 2854 } 2855 2856 static void 2857 pfsync_push_all(struct pfsync_softc *sc) 2858 { 2859 int c; 2860 struct pfsync_bucket *b; 2861 2862 for (c = 0; c < pfsync_buckets; c++) { 2863 b = &sc->sc_buckets[c]; 2864 2865 PFSYNC_BUCKET_LOCK(b); 2866 pfsync_push(b); 2867 PFSYNC_BUCKET_UNLOCK(b); 2868 } 2869 } 2870 2871 static void 2872 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2873 { 2874 struct ip *ip; 2875 int af, error = 0; 2876 2877 ip = mtod(m, struct ip *); 2878 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2879 2880 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2881 2882 /* 2883 * We distinguish between a deferral packet and our 2884 * own pfsync packet based on M_SKIP_FIREWALL 2885 * flag. This is XXX. 2886 */ 2887 switch (af) { 2888 #ifdef INET 2889 case AF_INET: 2890 if (m->m_flags & M_SKIP_FIREWALL) { 2891 error = ip_output(m, NULL, NULL, 0, 2892 NULL, NULL); 2893 } else { 2894 error = ip_output(m, NULL, NULL, 2895 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2896 } 2897 break; 2898 #endif 2899 #ifdef INET6 2900 case AF_INET6: 2901 if (m->m_flags & M_SKIP_FIREWALL) { 2902 error = ip6_output(m, NULL, NULL, 0, 2903 NULL, NULL, NULL); 2904 } else { 2905 error = ip6_output(m, NULL, NULL, 0, 2906 &sc->sc_im6o, NULL, NULL); 2907 } 2908 break; 2909 #endif 2910 } 2911 2912 if (error == 0) 2913 V_pfsyncstats.pfsyncs_opackets++; 2914 else 2915 V_pfsyncstats.pfsyncs_oerrors++; 2916 2917 } 2918 2919 static void 2920 pfsyncintr(void *arg) 2921 { 2922 struct epoch_tracker et; 2923 struct pfsync_softc *sc = arg; 2924 struct pfsync_bucket *b; 2925 struct mbuf *m, *n; 2926 int c; 2927 2928 NET_EPOCH_ENTER(et); 2929 CURVNET_SET(sc->sc_ifp->if_vnet); 2930 2931 for (c = 0; c < pfsync_buckets; c++) { 2932 b = &sc->sc_buckets[c]; 2933 2934 PFSYNC_BUCKET_LOCK(b); 2935 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2936 pfsync_sendout(0, b->b_id); 2937 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2938 } 2939 _IF_DEQUEUE_ALL(&b->b_snd, m); 2940 PFSYNC_BUCKET_UNLOCK(b); 2941 2942 for (; m != NULL; m = n) { 2943 n = m->m_nextpkt; 2944 m->m_nextpkt = NULL; 2945 2946 pfsync_tx(sc, m); 2947 } 2948 } 2949 CURVNET_RESTORE(); 2950 NET_EPOCH_EXIT(et); 2951 } 2952 2953 static int 2954 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2955 struct in_mfilter* imf, struct in6_mfilter* im6f) 2956 { 2957 #ifdef INET 2958 struct ip_moptions *imo = &sc->sc_imo; 2959 #endif 2960 #ifdef INET6 2961 struct ip6_moptions *im6o = &sc->sc_im6o; 2962 struct sockaddr_in6 *syncpeer_sa6 = NULL; 2963 #endif 2964 2965 if (!(ifp->if_flags & IFF_MULTICAST)) 2966 return (EADDRNOTAVAIL); 2967 2968 switch (sc->sc_sync_peer.ss_family) { 2969 #ifdef INET 2970 case AF_INET: 2971 { 2972 int error; 2973 2974 ip_mfilter_init(&imo->imo_head); 2975 imo->imo_multicast_vif = -1; 2976 if ((error = in_joingroup(ifp, 2977 &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2978 &imf->imf_inm)) != 0) 2979 return (error); 2980 2981 ip_mfilter_insert(&imo->imo_head, imf); 2982 imo->imo_multicast_ifp = ifp; 2983 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2984 imo->imo_multicast_loop = 0; 2985 break; 2986 } 2987 #endif 2988 #ifdef INET6 2989 case AF_INET6: 2990 { 2991 int error; 2992 2993 syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; 2994 if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) 2995 return (error); 2996 2997 ip6_mfilter_init(&im6o->im6o_head); 2998 if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, 2999 &(im6f->im6f_in6m), 0)) != 0) 3000 return (error); 3001 3002 ip6_mfilter_insert(&im6o->im6o_head, im6f); 3003 im6o->im6o_multicast_ifp = ifp; 3004 im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; 3005 im6o->im6o_multicast_loop = 0; 3006 break; 3007 } 3008 #endif 3009 } 3010 3011 return (0); 3012 } 3013 3014 static void 3015 pfsync_multicast_cleanup(struct pfsync_softc *sc) 3016 { 3017 #ifdef INET 3018 struct ip_moptions *imo = &sc->sc_imo; 3019 struct in_mfilter *imf; 3020 3021 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 3022 ip_mfilter_remove(&imo->imo_head, imf); 3023 in_leavegroup(imf->imf_inm, NULL); 3024 ip_mfilter_free(imf); 3025 } 3026 imo->imo_multicast_ifp = NULL; 3027 #endif 3028 3029 #ifdef INET6 3030 struct ip6_moptions *im6o = &sc->sc_im6o; 3031 struct in6_mfilter *im6f; 3032 3033 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { 3034 ip6_mfilter_remove(&im6o->im6o_head, im6f); 3035 in6_leavegroup(im6f->im6f_in6m, NULL); 3036 ip6_mfilter_free(im6f); 3037 } 3038 im6o->im6o_multicast_ifp = NULL; 3039 #endif 3040 } 3041 3042 void 3043 pfsync_detach_ifnet(struct ifnet *ifp) 3044 { 3045 struct pfsync_softc *sc = V_pfsyncif; 3046 3047 if (sc == NULL) 3048 return; 3049 3050 PFSYNC_LOCK(sc); 3051 3052 if (sc->sc_sync_if == ifp) { 3053 /* We don't need mutlicast cleanup here, because the interface 3054 * is going away. We do need to ensure we don't try to do 3055 * cleanup later. 3056 */ 3057 ip_mfilter_init(&sc->sc_imo.imo_head); 3058 sc->sc_imo.imo_multicast_ifp = NULL; 3059 sc->sc_im6o.im6o_multicast_ifp = NULL; 3060 sc->sc_sync_if = NULL; 3061 } 3062 3063 PFSYNC_UNLOCK(sc); 3064 } 3065 3066 static int 3067 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 3068 { 3069 struct sockaddr_storage sa; 3070 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 3071 status->flags = pfsyncr->pfsyncr_defer; 3072 3073 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 3074 3075 memset(&sa, 0, sizeof(sa)); 3076 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 3077 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 3078 in->sin_family = AF_INET; 3079 in->sin_len = sizeof(*in); 3080 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 3081 } 3082 status->syncpeer = sa; 3083 3084 return 0; 3085 } 3086 3087 static int 3088 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 3089 { 3090 struct ifnet *sifp; 3091 struct in_mfilter *imf = NULL; 3092 struct in6_mfilter *im6f = NULL; 3093 int error; 3094 int c; 3095 3096 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 3097 return (EINVAL); 3098 3099 if (status->syncdev[0] == '\0') 3100 sifp = NULL; 3101 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 3102 return (EINVAL); 3103 3104 switch (status->syncpeer.ss_family) { 3105 #ifdef INET 3106 case AF_UNSPEC: 3107 case AF_INET: { 3108 struct sockaddr_in *status_sin; 3109 status_sin = (struct sockaddr_in *)&(status->syncpeer); 3110 if (sifp != NULL) { 3111 if (status_sin->sin_addr.s_addr == 0 || 3112 status_sin->sin_addr.s_addr == 3113 htonl(INADDR_PFSYNC_GROUP)) { 3114 status_sin->sin_family = AF_INET; 3115 status_sin->sin_len = sizeof(*status_sin); 3116 status_sin->sin_addr.s_addr = 3117 htonl(INADDR_PFSYNC_GROUP); 3118 } 3119 3120 if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { 3121 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 3122 } 3123 } 3124 break; 3125 } 3126 #endif 3127 #ifdef INET6 3128 case AF_INET6: { 3129 struct sockaddr_in6 *status_sin6; 3130 status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); 3131 if (sifp != NULL) { 3132 if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || 3133 IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, 3134 &in6addr_linklocal_pfsync_group)) { 3135 status_sin6->sin6_family = AF_INET6; 3136 status_sin6->sin6_len = sizeof(*status_sin6); 3137 status_sin6->sin6_addr = 3138 in6addr_linklocal_pfsync_group; 3139 } 3140 3141 if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { 3142 im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); 3143 } 3144 } 3145 break; 3146 } 3147 #endif 3148 } 3149 3150 PFSYNC_LOCK(sc); 3151 3152 switch (status->version) { 3153 case PFSYNC_MSG_VERSION_UNSPECIFIED: 3154 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 3155 break; 3156 case PFSYNC_MSG_VERSION_1301: 3157 case PFSYNC_MSG_VERSION_1400: 3158 case PFSYNC_MSG_VERSION_1500: 3159 sc->sc_version = status->version; 3160 break; 3161 default: 3162 PFSYNC_UNLOCK(sc); 3163 return (EINVAL); 3164 } 3165 3166 switch (status->syncpeer.ss_family) { 3167 case AF_INET: { 3168 struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); 3169 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 3170 sc_sin->sin_family = AF_INET; 3171 sc_sin->sin_len = sizeof(*sc_sin); 3172 if (status_sin->sin_addr.s_addr == 0) { 3173 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 3174 } else { 3175 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 3176 } 3177 break; 3178 } 3179 case AF_INET6: { 3180 struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); 3181 struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; 3182 sc_sin->sin6_family = AF_INET6; 3183 sc_sin->sin6_len = sizeof(*sc_sin); 3184 if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { 3185 sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; 3186 } else { 3187 sc_sin->sin6_addr = status_sin->sin6_addr; 3188 } 3189 break; 3190 } 3191 } 3192 3193 sc->sc_maxupdates = status->maxupdates; 3194 if (status->flags & PFSYNCF_DEFER) { 3195 sc->sc_flags |= PFSYNCF_DEFER; 3196 V_pfsync_defer_ptr = pfsync_defer; 3197 } else { 3198 sc->sc_flags &= ~PFSYNCF_DEFER; 3199 V_pfsync_defer_ptr = NULL; 3200 } 3201 3202 if (sifp == NULL) { 3203 if (sc->sc_sync_if) 3204 if_rele(sc->sc_sync_if); 3205 sc->sc_sync_if = NULL; 3206 pfsync_multicast_cleanup(sc); 3207 PFSYNC_UNLOCK(sc); 3208 return (0); 3209 } 3210 3211 for (c = 0; c < pfsync_buckets; c++) { 3212 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 3213 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 3214 (sifp->if_mtu < sc->sc_ifp->if_mtu || 3215 (sc->sc_sync_if != NULL && 3216 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 3217 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 3218 pfsync_sendout(1, c); 3219 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 3220 } 3221 3222 pfsync_multicast_cleanup(sc); 3223 3224 if (((sc->sc_sync_peer.ss_family == AF_INET) && 3225 IN_MULTICAST(ntohl(((struct sockaddr_in *) 3226 &sc->sc_sync_peer)->sin_addr.s_addr))) || 3227 ((sc->sc_sync_peer.ss_family == AF_INET6) && 3228 IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) 3229 &sc->sc_sync_peer)->sin6_addr))) { 3230 error = pfsync_multicast_setup(sc, sifp, imf, im6f); 3231 if (error) { 3232 if_rele(sifp); 3233 PFSYNC_UNLOCK(sc); 3234 #ifdef INET 3235 if (imf != NULL) 3236 ip_mfilter_free(imf); 3237 #endif 3238 #ifdef INET6 3239 if (im6f != NULL) 3240 ip6_mfilter_free(im6f); 3241 #endif 3242 return (error); 3243 } 3244 } 3245 if (sc->sc_sync_if) 3246 if_rele(sc->sc_sync_if); 3247 sc->sc_sync_if = sifp; 3248 3249 switch (sc->sc_sync_peer.ss_family) { 3250 #ifdef INET 3251 case AF_INET: { 3252 struct ip *ip; 3253 ip = &sc->sc_template.ipv4; 3254 bzero(ip, sizeof(*ip)); 3255 ip->ip_v = IPVERSION; 3256 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 3257 ip->ip_tos = IPTOS_LOWDELAY; 3258 /* len and id are set later. */ 3259 ip->ip_off = htons(IP_DF); 3260 ip->ip_ttl = PFSYNC_DFLTTL; 3261 ip->ip_p = IPPROTO_PFSYNC; 3262 ip->ip_src.s_addr = INADDR_ANY; 3263 ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 3264 break; 3265 } 3266 #endif 3267 #ifdef INET6 3268 case AF_INET6: { 3269 struct ip6_hdr *ip6; 3270 ip6 = &sc->sc_template.ipv6; 3271 bzero(ip6, sizeof(*ip6)); 3272 ip6->ip6_vfc = IPV6_VERSION; 3273 ip6->ip6_hlim = PFSYNC_DFLTTL; 3274 ip6->ip6_nxt = IPPROTO_PFSYNC; 3275 ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; 3276 3277 struct epoch_tracker et; 3278 NET_EPOCH_ENTER(et); 3279 in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, 3280 sc->sc_sync_if, &ip6->ip6_src, NULL); 3281 NET_EPOCH_EXIT(et); 3282 break; 3283 } 3284 #endif 3285 } 3286 3287 /* Request a full state table update. */ 3288 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 3289 (*carp_demote_adj_p)(V_pfsync_carp_adj, 3290 "pfsync bulk start"); 3291 sc->sc_flags &= ~PFSYNCF_OK; 3292 if (V_pf_status.debug >= PF_DEBUG_MISC) 3293 printf("pfsync: requesting bulk update\n"); 3294 PFSYNC_UNLOCK(sc); 3295 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 3296 pfsync_request_update(0, 0); 3297 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 3298 PFSYNC_BLOCK(sc); 3299 sc->sc_ureq_sent = time_uptime; 3300 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 3301 PFSYNC_BUNLOCK(sc); 3302 return (0); 3303 } 3304 3305 static void 3306 pfsync_pointers_init(void) 3307 { 3308 3309 PF_RULES_WLOCK(); 3310 V_pfsync_state_import_ptr = pfsync_state_import; 3311 V_pfsync_insert_state_ptr = pfsync_insert_state; 3312 V_pfsync_update_state_ptr = pfsync_update_state; 3313 V_pfsync_delete_state_ptr = pfsync_delete_state; 3314 V_pfsync_clear_states_ptr = pfsync_clear_states; 3315 V_pfsync_defer_ptr = pfsync_defer; 3316 PF_RULES_WUNLOCK(); 3317 } 3318 3319 static void 3320 pfsync_pointers_uninit(void) 3321 { 3322 3323 PF_RULES_WLOCK(); 3324 V_pfsync_state_import_ptr = NULL; 3325 V_pfsync_insert_state_ptr = NULL; 3326 V_pfsync_update_state_ptr = NULL; 3327 V_pfsync_delete_state_ptr = NULL; 3328 V_pfsync_clear_states_ptr = NULL; 3329 V_pfsync_defer_ptr = NULL; 3330 PF_RULES_WUNLOCK(); 3331 } 3332 3333 static void 3334 vnet_pfsync_init(const void *unused __unused) 3335 { 3336 int error; 3337 3338 V_pfsync_cloner = if_clone_simple(pfsyncname, 3339 pfsync_clone_create, pfsync_clone_destroy, 1); 3340 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 3341 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 3342 if (error) { 3343 if_clone_detach(V_pfsync_cloner); 3344 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 3345 } 3346 3347 pfsync_pointers_init(); 3348 } 3349 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 3350 vnet_pfsync_init, NULL); 3351 3352 static void 3353 vnet_pfsync_uninit(const void *unused __unused) 3354 { 3355 int ret __diagused; 3356 3357 pfsync_pointers_uninit(); 3358 3359 if_clone_detach(V_pfsync_cloner); 3360 ret = swi_remove(V_pfsync_swi_cookie); 3361 MPASS(ret == 0); 3362 ret = intr_event_destroy(V_pfsync_swi_ie); 3363 MPASS(ret == 0); 3364 } 3365 3366 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 3367 vnet_pfsync_uninit, NULL); 3368 3369 static int 3370 pfsync_init(void) 3371 { 3372 int error; 3373 3374 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 3375 3376 #ifdef INET 3377 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 3378 if (error) 3379 return (error); 3380 #endif 3381 #ifdef INET6 3382 error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); 3383 if (error) { 3384 ipproto_unregister(IPPROTO_PFSYNC); 3385 return (error); 3386 } 3387 #endif 3388 3389 return (0); 3390 } 3391 3392 static void 3393 pfsync_uninit(void) 3394 { 3395 pfsync_detach_ifnet_ptr = NULL; 3396 3397 #ifdef INET 3398 ipproto_unregister(IPPROTO_PFSYNC); 3399 #endif 3400 #ifdef INET6 3401 ip6proto_unregister(IPPROTO_PFSYNC); 3402 #endif 3403 } 3404 3405 static int 3406 pfsync_modevent(module_t mod, int type, void *data) 3407 { 3408 int error = 0; 3409 3410 switch (type) { 3411 case MOD_LOAD: 3412 error = pfsync_init(); 3413 break; 3414 case MOD_UNLOAD: 3415 pfsync_uninit(); 3416 break; 3417 default: 3418 error = EINVAL; 3419 break; 3420 } 3421 3422 return (error); 3423 } 3424 3425 static moduledata_t pfsync_mod = { 3426 pfsyncname, 3427 pfsync_modevent, 3428 0 3429 }; 3430 3431 #define PFSYNC_MODVER 1 3432 3433 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 3434 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 3435 MODULE_VERSION(pfsync, PFSYNC_MODVER); 3436 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 3437