1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/nv.h> 79 #include <sys/priv.h> 80 #include <sys/smp.h> 81 #include <sys/socket.h> 82 #include <sys/sockio.h> 83 #include <sys/sysctl.h> 84 #include <sys/syslog.h> 85 86 #include <net/bpf.h> 87 #include <net/if.h> 88 #include <net/if_var.h> 89 #include <net/if_clone.h> 90 #include <net/if_private.h> 91 #include <net/if_types.h> 92 #include <net/vnet.h> 93 #include <net/pfvar.h> 94 #include <net/if_pfsync.h> 95 96 #include <netinet/if_ether.h> 97 #include <netinet/in.h> 98 #include <netinet/in_var.h> 99 #include <netinet/ip.h> 100 #include <netinet/ip_carp.h> 101 #include <netinet/ip_var.h> 102 #include <netinet/tcp.h> 103 #include <netinet/tcp_fsm.h> 104 #include <netinet/tcp_seq.h> 105 106 #include <netinet/ip6.h> 107 #include <netinet6/ip6_var.h> 108 109 #include <netpfil/pf/pfsync_nv.h> 110 111 struct pfsync_bucket; 112 struct pfsync_softc; 113 114 union inet_template { 115 struct ip ipv4; 116 }; 117 118 #define PFSYNC_MINPKT ( \ 119 sizeof(union inet_template) + \ 120 sizeof(struct pfsync_header) + \ 121 sizeof(struct pfsync_subheader) ) 122 123 static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, 124 struct pfsync_state_peer *); 125 static int pfsync_in_clr(struct mbuf *, int, int, int, int); 126 static int pfsync_in_ins(struct mbuf *, int, int, int, int); 127 static int pfsync_in_iack(struct mbuf *, int, int, int, int); 128 static int pfsync_in_upd(struct mbuf *, int, int, int, int); 129 static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); 130 static int pfsync_in_ureq(struct mbuf *, int, int, int, int); 131 static int pfsync_in_del_c(struct mbuf *, int, int, int, int); 132 static int pfsync_in_bus(struct mbuf *, int, int, int, int); 133 static int pfsync_in_tdb(struct mbuf *, int, int, int, int); 134 static int pfsync_in_eof(struct mbuf *, int, int, int, int); 135 static int pfsync_in_error(struct mbuf *, int, int, int, int); 136 137 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { 138 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 139 pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ 140 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 141 pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ 142 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 143 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 144 pfsync_in_error, /* PFSYNC_ACT_DEL */ 145 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 146 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 147 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 148 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 149 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 150 pfsync_in_eof, /* PFSYNC_ACT_EOF */ 151 pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ 152 pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ 153 }; 154 155 struct pfsync_q { 156 void (*write)(struct pf_kstate *, void *); 157 size_t len; 158 u_int8_t action; 159 }; 160 161 /* We have the following sync queues */ 162 enum pfsync_q_id { 163 PFSYNC_Q_INS_1301, 164 PFSYNC_Q_INS_1400, 165 PFSYNC_Q_IACK, 166 PFSYNC_Q_UPD_1301, 167 PFSYNC_Q_UPD_1400, 168 PFSYNC_Q_UPD_C, 169 PFSYNC_Q_DEL_C, 170 PFSYNC_Q_COUNT, 171 }; 172 173 /* Functions for building messages for given queue */ 174 static void pfsync_out_state_1301(struct pf_kstate *, void *); 175 static void pfsync_out_state_1400(struct pf_kstate *, void *); 176 static void pfsync_out_iack(struct pf_kstate *, void *); 177 static void pfsync_out_upd_c(struct pf_kstate *, void *); 178 static void pfsync_out_del_c(struct pf_kstate *, void *); 179 180 /* Attach those functions to queue */ 181 static struct pfsync_q pfsync_qs[] = { 182 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, 183 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, 184 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 185 { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, 186 { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, 187 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 188 { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 189 }; 190 191 /* Map queue to pf_kstate->sync_state */ 192 static u_int8_t pfsync_qid_sstate[] = { 193 PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ 194 PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ 195 PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ 196 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ 197 PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ 198 PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ 199 PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ 200 }; 201 202 /* Map pf_kstate->sync_state to queue */ 203 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); 204 205 static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); 206 static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); 207 208 static void pfsync_update_state(struct pf_kstate *); 209 static void pfsync_tx(struct pfsync_softc *, struct mbuf *); 210 211 struct pfsync_upd_req_item { 212 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 213 struct pfsync_upd_req ur_msg; 214 }; 215 216 struct pfsync_deferral { 217 struct pfsync_softc *pd_sc; 218 TAILQ_ENTRY(pfsync_deferral) pd_entry; 219 struct callout pd_tmo; 220 221 struct pf_kstate *pd_st; 222 struct mbuf *pd_m; 223 }; 224 225 struct pfsync_bucket 226 { 227 int b_id; 228 struct pfsync_softc *b_sc; 229 struct mtx b_mtx; 230 struct callout b_tmo; 231 int b_flags; 232 #define PFSYNCF_BUCKET_PUSH 0x00000001 233 234 size_t b_len; 235 TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; 236 TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; 237 TAILQ_HEAD(, pfsync_deferral) b_deferrals; 238 u_int b_deferred; 239 void *b_plus; 240 size_t b_pluslen; 241 242 struct ifaltq b_snd; 243 }; 244 245 struct pfsync_softc { 246 /* Configuration */ 247 struct ifnet *sc_ifp; 248 struct ifnet *sc_sync_if; 249 struct ip_moptions sc_imo; 250 struct sockaddr_storage sc_sync_peer; 251 uint32_t sc_flags; 252 uint8_t sc_maxupdates; 253 union inet_template sc_template; 254 struct mtx sc_mtx; 255 uint32_t sc_version; 256 257 /* Queued data */ 258 struct pfsync_bucket *sc_buckets; 259 260 /* Bulk update info */ 261 struct mtx sc_bulk_mtx; 262 uint32_t sc_ureq_sent; 263 int sc_bulk_tries; 264 uint32_t sc_ureq_received; 265 int sc_bulk_hashid; 266 uint64_t sc_bulk_stateid; 267 uint32_t sc_bulk_creatorid; 268 struct callout sc_bulk_tmo; 269 struct callout sc_bulkfail_tmo; 270 }; 271 272 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 273 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 274 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 275 276 #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) 277 #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) 278 #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) 279 280 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 281 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 282 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 283 284 #define PFSYNC_DEFER_TIMEOUT 20 285 286 static const char pfsyncname[] = "pfsync"; 287 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 288 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 289 #define V_pfsyncif VNET(pfsyncif) 290 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 291 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 292 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); 293 #define V_pfsync_swi_ie VNET(pfsync_swi_ie) 294 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 295 #define V_pfsyncstats VNET(pfsyncstats) 296 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 297 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 298 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; 299 #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) 300 301 static void pfsync_timeout(void *); 302 static void pfsync_push(struct pfsync_bucket *); 303 static void pfsync_push_all(struct pfsync_softc *); 304 static void pfsyncintr(void *); 305 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 306 struct in_mfilter *imf); 307 static void pfsync_multicast_cleanup(struct pfsync_softc *); 308 static void pfsync_pointers_init(void); 309 static void pfsync_pointers_uninit(void); 310 static int pfsync_init(void); 311 static void pfsync_uninit(void); 312 313 static unsigned long pfsync_buckets; 314 315 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 316 "PFSYNC"); 317 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 318 &VNET_NAME(pfsyncstats), pfsyncstats, 319 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 320 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, 321 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 322 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, 323 &pfsync_buckets, 0, "Number of pfsync hash buckets"); 324 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, 325 &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); 326 327 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 328 static void pfsync_clone_destroy(struct ifnet *); 329 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 330 struct pf_state_peer *); 331 static int pfsyncoutput(struct ifnet *, struct mbuf *, 332 const struct sockaddr *, struct route *); 333 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 334 335 static int pfsync_defer(struct pf_kstate *, struct mbuf *); 336 static void pfsync_undefer(struct pfsync_deferral *, int); 337 static void pfsync_undefer_state_locked(struct pf_kstate *, int); 338 static void pfsync_undefer_state(struct pf_kstate *, int); 339 static void pfsync_defer_tmo(void *); 340 341 static void pfsync_request_update(u_int32_t, u_int64_t); 342 static bool pfsync_update_state_req(struct pf_kstate *); 343 344 static void pfsync_drop(struct pfsync_softc *); 345 static void pfsync_sendout(int, int); 346 static void pfsync_send_plus(void *, size_t); 347 348 static void pfsync_bulk_start(void); 349 static void pfsync_bulk_status(u_int8_t); 350 static void pfsync_bulk_update(void *); 351 static void pfsync_bulk_fail(void *); 352 353 static void pfsync_detach_ifnet(struct ifnet *); 354 355 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, 356 struct pfsync_kstatus *); 357 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, 358 struct pfsync_softc *); 359 360 #ifdef IPSEC 361 static void pfsync_update_net_tdb(struct pfsync_tdb *); 362 #endif 363 static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, 364 struct pf_kstate *); 365 366 #define PFSYNC_MAX_BULKTRIES 12 367 368 VNET_DEFINE(struct if_clone *, pfsync_cloner); 369 #define V_pfsync_cloner VNET(pfsync_cloner) 370 371 static int 372 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 373 { 374 struct pfsync_softc *sc; 375 struct ifnet *ifp; 376 struct pfsync_bucket *b; 377 int c; 378 enum pfsync_q_id q; 379 380 if (unit != 0) 381 return (EINVAL); 382 383 if (! pfsync_buckets) 384 pfsync_buckets = mp_ncpus * 2; 385 386 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 387 sc->sc_flags |= PFSYNCF_OK; 388 sc->sc_maxupdates = 128; 389 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 390 391 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 392 if (ifp == NULL) { 393 free(sc, M_PFSYNC); 394 return (ENOSPC); 395 } 396 if_initname(ifp, pfsyncname, unit); 397 ifp->if_softc = sc; 398 ifp->if_ioctl = pfsyncioctl; 399 ifp->if_output = pfsyncoutput; 400 ifp->if_type = IFT_PFSYNC; 401 ifp->if_hdrlen = sizeof(struct pfsync_header); 402 ifp->if_mtu = ETHERMTU; 403 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 404 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 405 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 406 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 407 408 if_attach(ifp); 409 410 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 411 412 sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), 413 M_PFSYNC, M_ZERO | M_WAITOK); 414 for (c = 0; c < pfsync_buckets; c++) { 415 b = &sc->sc_buckets[c]; 416 mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); 417 418 b->b_id = c; 419 b->b_sc = sc; 420 b->b_len = PFSYNC_MINPKT; 421 422 for (q = 0; q < PFSYNC_Q_COUNT; q++) 423 TAILQ_INIT(&b->b_qs[q]); 424 425 TAILQ_INIT(&b->b_upd_req_list); 426 TAILQ_INIT(&b->b_deferrals); 427 428 callout_init(&b->b_tmo, 1); 429 430 b->b_snd.ifq_maxlen = ifqmaxlen; 431 } 432 433 V_pfsyncif = sc; 434 435 return (0); 436 } 437 438 static void 439 pfsync_clone_destroy(struct ifnet *ifp) 440 { 441 struct pfsync_softc *sc = ifp->if_softc; 442 struct pfsync_bucket *b; 443 int c, ret; 444 445 for (c = 0; c < pfsync_buckets; c++) { 446 b = &sc->sc_buckets[c]; 447 /* 448 * At this stage, everything should have already been 449 * cleared by pfsync_uninit(), and we have only to 450 * drain callouts. 451 */ 452 PFSYNC_BUCKET_LOCK(b); 453 while (b->b_deferred > 0) { 454 struct pfsync_deferral *pd = 455 TAILQ_FIRST(&b->b_deferrals); 456 457 ret = callout_stop(&pd->pd_tmo); 458 PFSYNC_BUCKET_UNLOCK(b); 459 if (ret > 0) { 460 pfsync_undefer(pd, 1); 461 } else { 462 callout_drain(&pd->pd_tmo); 463 } 464 PFSYNC_BUCKET_LOCK(b); 465 } 466 MPASS(b->b_deferred == 0); 467 MPASS(TAILQ_EMPTY(&b->b_deferrals)); 468 PFSYNC_BUCKET_UNLOCK(b); 469 470 callout_drain(&b->b_tmo); 471 } 472 473 callout_drain(&sc->sc_bulkfail_tmo); 474 callout_drain(&sc->sc_bulk_tmo); 475 476 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 477 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 478 bpfdetach(ifp); 479 if_detach(ifp); 480 481 pfsync_drop(sc); 482 483 if_free(ifp); 484 pfsync_multicast_cleanup(sc); 485 mtx_destroy(&sc->sc_mtx); 486 mtx_destroy(&sc->sc_bulk_mtx); 487 488 free(sc->sc_buckets, M_PFSYNC); 489 free(sc, M_PFSYNC); 490 491 V_pfsyncif = NULL; 492 } 493 494 static int 495 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 496 struct pf_state_peer *d) 497 { 498 if (s->scrub.scrub_flag && d->scrub == NULL) { 499 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 500 if (d->scrub == NULL) 501 return (ENOMEM); 502 } 503 504 return (0); 505 } 506 507 static int 508 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) 509 { 510 struct pfsync_softc *sc = V_pfsyncif; 511 #ifndef __NO_STRICT_ALIGNMENT 512 struct pfsync_state_key key[2]; 513 #endif 514 struct pfsync_state_key *kw, *ks; 515 struct pf_kstate *st = NULL; 516 struct pf_state_key *skw = NULL, *sks = NULL; 517 struct pf_krule *r = NULL; 518 struct pfi_kkif *kif; 519 int error; 520 521 PF_RULES_RASSERT(); 522 523 if (sp->pfs_1301.creatorid == 0) { 524 if (V_pf_status.debug >= PF_DEBUG_MISC) 525 printf("%s: invalid creator id: %08x\n", __func__, 526 ntohl(sp->pfs_1301.creatorid)); 527 return (EINVAL); 528 } 529 530 if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { 531 if (V_pf_status.debug >= PF_DEBUG_MISC) 532 printf("%s: unknown interface: %s\n", __func__, 533 sp->pfs_1301.ifname); 534 if (flags & PFSYNC_SI_IOCTL) 535 return (EINVAL); 536 return (0); /* skip this state */ 537 } 538 539 /* 540 * If the ruleset checksums match or the state is coming from the ioctl, 541 * it's safe to associate the state with the rule of that number. 542 */ 543 if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && 544 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < 545 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 546 r = pf_main_ruleset.rules[ 547 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)]; 548 else 549 r = &V_pf_default_rule; 550 551 if ((r->max_states && 552 counter_u64_fetch(r->states_cur) >= r->max_states)) 553 goto cleanup; 554 555 /* 556 * XXXGL: consider M_WAITOK in ioctl path after. 557 */ 558 st = pf_alloc_state(M_NOWAIT); 559 if (__predict_false(st == NULL)) 560 goto cleanup; 561 562 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 563 goto cleanup; 564 565 #ifndef __NO_STRICT_ALIGNMENT 566 bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); 567 kw = &key[PF_SK_WIRE]; 568 ks = &key[PF_SK_STACK]; 569 #else 570 kw = &sp->pfs_1301.key[PF_SK_WIRE]; 571 ks = &sp->pfs_1301.key[PF_SK_STACK]; 572 #endif 573 574 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || 575 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || 576 kw->port[0] != ks->port[0] || 577 kw->port[1] != ks->port[1]) { 578 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 579 if (sks == NULL) 580 goto cleanup; 581 } else 582 sks = skw; 583 584 /* allocate memory for scrub info */ 585 if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || 586 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) 587 goto cleanup; 588 589 /* Copy to state key(s). */ 590 skw->addr[0] = kw->addr[0]; 591 skw->addr[1] = kw->addr[1]; 592 skw->port[0] = kw->port[0]; 593 skw->port[1] = kw->port[1]; 594 skw->proto = sp->pfs_1301.proto; 595 skw->af = sp->pfs_1301.af; 596 if (sks != skw) { 597 sks->addr[0] = ks->addr[0]; 598 sks->addr[1] = ks->addr[1]; 599 sks->port[0] = ks->port[0]; 600 sks->port[1] = ks->port[1]; 601 sks->proto = sp->pfs_1301.proto; 602 sks->af = sp->pfs_1301.af; 603 } 604 605 /* copy to state */ 606 bcopy(&sp->pfs_1301.rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 607 st->creation = time_uptime - ntohl(sp->pfs_1301.creation); 608 st->expire = time_uptime; 609 if (sp->pfs_1301.expire) { 610 uint32_t timeout; 611 612 timeout = r->timeout[sp->pfs_1301.timeout]; 613 if (!timeout) 614 timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; 615 616 /* sp->expire may have been adaptively scaled by export. */ 617 st->expire -= timeout - ntohl(sp->pfs_1301.expire); 618 } 619 620 st->direction = sp->pfs_1301.direction; 621 st->log = sp->pfs_1301.log; 622 st->timeout = sp->pfs_1301.timeout; 623 624 switch (msg_version) { 625 case PFSYNC_MSG_VERSION_1301: 626 st->state_flags = sp->pfs_1301.state_flags; 627 /* 628 * In FreeBSD 13 pfsync lacks many attributes. Copy them 629 * from the rule if possible. If rule can't be matched 630 * clear any set options as we can't recover their 631 * parameters. 632 */ 633 if (r == &V_pf_default_rule) { 634 st->state_flags &= ~PFSTATE_SETMASK; 635 } else { 636 /* 637 * Similar to pf_rule_to_actions(). This code 638 * won't set the actions properly if they come 639 * from multiple "match" rules as only rule 640 * creating the state is send over pfsync. 641 */ 642 st->qid = r->qid; 643 st->pqid = r->pqid; 644 st->rtableid = r->rtableid; 645 if (r->scrub_flags & PFSTATE_SETTOS) 646 st->set_tos = r->set_tos; 647 st->min_ttl = r->min_ttl; 648 st->max_mss = r->max_mss; 649 st->state_flags |= (r->scrub_flags & 650 (PFSTATE_NODF|PFSTATE_RANDOMID| 651 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| 652 PFSTATE_SETPRIO)); 653 if (r->dnpipe || r->dnrpipe) { 654 if (r->free_flags & PFRULE_DN_IS_PIPE) 655 st->state_flags |= PFSTATE_DN_IS_PIPE; 656 else 657 st->state_flags &= ~PFSTATE_DN_IS_PIPE; 658 } 659 st->dnpipe = r->dnpipe; 660 st->dnrpipe = r->dnrpipe; 661 } 662 break; 663 case PFSYNC_MSG_VERSION_1400: 664 st->state_flags = ntohs(sp->pfs_1400.state_flags); 665 st->qid = ntohs(sp->pfs_1400.qid); 666 st->pqid = ntohs(sp->pfs_1400.pqid); 667 st->dnpipe = ntohs(sp->pfs_1400.dnpipe); 668 st->dnrpipe = ntohs(sp->pfs_1400.dnrpipe); 669 st->rtableid = ntohl(sp->pfs_1400.rtableid); 670 st->min_ttl = sp->pfs_1400.min_ttl; 671 st->set_tos = sp->pfs_1400.set_tos; 672 st->max_mss = ntohs(sp->pfs_1400.max_mss); 673 st->set_prio[0] = sp->pfs_1400.set_prio[0]; 674 st->set_prio[1] = sp->pfs_1400.set_prio[1]; 675 st->rt = sp->pfs_1400.rt; 676 if (st->rt && (st->rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname)) == NULL) { 677 if (V_pf_status.debug >= PF_DEBUG_MISC) 678 printf("%s: unknown route interface: %s\n", 679 __func__, sp->pfs_1400.rt_ifname); 680 if (flags & PFSYNC_SI_IOCTL) 681 return (EINVAL); 682 return (0); /* skip this state */ 683 } 684 break; 685 default: 686 panic("%s: Unsupported pfsync_msg_version %d", 687 __func__, msg_version); 688 } 689 690 st->id = sp->pfs_1301.id; 691 st->creatorid = sp->pfs_1301.creatorid; 692 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 693 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 694 695 st->rule.ptr = r; 696 st->nat_rule.ptr = NULL; 697 st->anchor.ptr = NULL; 698 699 st->pfsync_time = time_uptime; 700 st->sync_state = PFSYNC_S_NONE; 701 702 if (!(flags & PFSYNC_SI_IOCTL)) 703 st->state_flags |= PFSTATE_NOSYNC; 704 705 if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) 706 goto cleanup_state; 707 708 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 709 counter_u64_add(r->states_cur, 1); 710 counter_u64_add(r->states_tot, 1); 711 712 if (!(flags & PFSYNC_SI_IOCTL)) { 713 st->state_flags &= ~PFSTATE_NOSYNC; 714 if (st->state_flags & PFSTATE_ACK) { 715 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 716 PFSYNC_BUCKET_LOCK(b); 717 pfsync_q_ins(st, PFSYNC_S_IACK, true); 718 PFSYNC_BUCKET_UNLOCK(b); 719 720 pfsync_push_all(sc); 721 } 722 } 723 st->state_flags &= ~PFSTATE_ACK; 724 PF_STATE_UNLOCK(st); 725 726 return (0); 727 728 cleanup: 729 error = ENOMEM; 730 if (skw == sks) 731 sks = NULL; 732 uma_zfree(V_pf_state_key_z, skw); 733 uma_zfree(V_pf_state_key_z, sks); 734 735 cleanup_state: /* pf_state_insert() frees the state keys. */ 736 if (st) { 737 st->timeout = PFTM_UNLINKED; /* appease an assert */ 738 pf_free_state(st); 739 } 740 return (error); 741 } 742 743 #ifdef INET 744 static int 745 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 746 { 747 struct pfsync_softc *sc = V_pfsyncif; 748 struct mbuf *m = *mp; 749 struct ip *ip = mtod(m, struct ip *); 750 struct pfsync_header *ph; 751 struct pfsync_subheader subh; 752 753 int offset, len, flags = 0; 754 int rv; 755 uint16_t count; 756 757 PF_RULES_RLOCK_TRACKER; 758 759 *mp = NULL; 760 V_pfsyncstats.pfsyncs_ipackets++; 761 762 /* Verify that we have a sync interface configured. */ 763 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 764 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 765 goto done; 766 767 /* verify that the packet came in on the right interface */ 768 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 769 V_pfsyncstats.pfsyncs_badif++; 770 goto done; 771 } 772 773 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 774 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 775 /* verify that the IP TTL is 255. */ 776 if (ip->ip_ttl != PFSYNC_DFLTTL) { 777 V_pfsyncstats.pfsyncs_badttl++; 778 goto done; 779 } 780 781 offset = ip->ip_hl << 2; 782 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 783 V_pfsyncstats.pfsyncs_hdrops++; 784 goto done; 785 } 786 787 if (offset + sizeof(*ph) > m->m_len) { 788 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 789 V_pfsyncstats.pfsyncs_hdrops++; 790 return (IPPROTO_DONE); 791 } 792 ip = mtod(m, struct ip *); 793 } 794 ph = (struct pfsync_header *)((char *)ip + offset); 795 796 /* verify the version */ 797 if (ph->version != PFSYNC_VERSION) { 798 V_pfsyncstats.pfsyncs_badver++; 799 goto done; 800 } 801 802 len = ntohs(ph->len) + offset; 803 if (m->m_pkthdr.len < len) { 804 V_pfsyncstats.pfsyncs_badlen++; 805 goto done; 806 } 807 808 /* 809 * Trusting pf_chksum during packet processing, as well as seeking 810 * in interface name tree, require holding PF_RULES_RLOCK(). 811 */ 812 PF_RULES_RLOCK(); 813 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 814 flags = PFSYNC_SI_CKSUM; 815 816 offset += sizeof(*ph); 817 while (offset <= len - sizeof(subh)) { 818 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 819 offset += sizeof(subh); 820 821 if (subh.action >= PFSYNC_ACT_MAX) { 822 V_pfsyncstats.pfsyncs_badact++; 823 PF_RULES_RUNLOCK(); 824 goto done; 825 } 826 827 count = ntohs(subh.count); 828 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 829 rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); 830 if (rv == -1) { 831 PF_RULES_RUNLOCK(); 832 return (IPPROTO_DONE); 833 } 834 835 offset += rv; 836 } 837 PF_RULES_RUNLOCK(); 838 839 done: 840 m_freem(m); 841 return (IPPROTO_DONE); 842 } 843 #endif 844 845 static int 846 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) 847 { 848 struct pfsync_clr *clr; 849 struct mbuf *mp; 850 int len = sizeof(*clr) * count; 851 int i, offp; 852 u_int32_t creatorid; 853 854 mp = m_pulldown(m, offset, len, &offp); 855 if (mp == NULL) { 856 V_pfsyncstats.pfsyncs_badlen++; 857 return (-1); 858 } 859 clr = (struct pfsync_clr *)(mp->m_data + offp); 860 861 for (i = 0; i < count; i++) { 862 creatorid = clr[i].creatorid; 863 864 if (clr[i].ifname[0] != '\0' && 865 pfi_kkif_find(clr[i].ifname) == NULL) 866 continue; 867 868 for (int i = 0; i <= pf_hashmask; i++) { 869 struct pf_idhash *ih = &V_pf_idhash[i]; 870 struct pf_kstate *s; 871 relock: 872 PF_HASHROW_LOCK(ih); 873 LIST_FOREACH(s, &ih->states, entry) { 874 if (s->creatorid == creatorid) { 875 s->state_flags |= PFSTATE_NOSYNC; 876 pf_unlink_state(s); 877 goto relock; 878 } 879 } 880 PF_HASHROW_UNLOCK(ih); 881 } 882 } 883 884 return (len); 885 } 886 887 static int 888 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) 889 { 890 struct mbuf *mp; 891 union pfsync_state_union *sa, *sp; 892 int i, offp, len, msg_version; 893 894 switch (action) { 895 case PFSYNC_ACT_INS_1301: 896 len = sizeof(struct pfsync_state_1301) * count; 897 msg_version = PFSYNC_MSG_VERSION_1301; 898 break; 899 case PFSYNC_ACT_INS_1400: 900 len = sizeof(struct pfsync_state_1400) * count; 901 msg_version = PFSYNC_MSG_VERSION_1400; 902 break; 903 default: 904 V_pfsyncstats.pfsyncs_badact++; 905 return (-1); 906 } 907 908 mp = m_pulldown(m, offset, len, &offp); 909 if (mp == NULL) { 910 V_pfsyncstats.pfsyncs_badlen++; 911 return (-1); 912 } 913 sa = (union pfsync_state_union *)(mp->m_data + offp); 914 915 for (i = 0; i < count; i++) { 916 sp = &sa[i]; 917 918 /* Check for invalid values. */ 919 if (sp->pfs_1301.timeout >= PFTM_MAX || 920 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 921 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || 922 sp->pfs_1301.direction > PF_OUT || 923 (sp->pfs_1301.af != AF_INET && 924 sp->pfs_1301.af != AF_INET6)) { 925 if (V_pf_status.debug >= PF_DEBUG_MISC) 926 printf("%s: invalid value\n", __func__); 927 V_pfsyncstats.pfsyncs_badval++; 928 continue; 929 } 930 931 if (pfsync_state_import(sp, flags, msg_version) == ENOMEM) 932 /* Drop out, but process the rest of the actions. */ 933 break; 934 } 935 936 return (len); 937 } 938 939 static int 940 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) 941 { 942 struct pfsync_ins_ack *ia, *iaa; 943 struct pf_kstate *st; 944 945 struct mbuf *mp; 946 int len = count * sizeof(*ia); 947 int offp, i; 948 949 mp = m_pulldown(m, offset, len, &offp); 950 if (mp == NULL) { 951 V_pfsyncstats.pfsyncs_badlen++; 952 return (-1); 953 } 954 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 955 956 for (i = 0; i < count; i++) { 957 ia = &iaa[i]; 958 959 st = pf_find_state_byid(ia->id, ia->creatorid); 960 if (st == NULL) 961 continue; 962 963 if (st->state_flags & PFSTATE_ACK) { 964 pfsync_undefer_state(st, 0); 965 } 966 PF_STATE_UNLOCK(st); 967 } 968 /* 969 * XXX this is not yet implemented, but we know the size of the 970 * message so we can skip it. 971 */ 972 973 return (count * sizeof(struct pfsync_ins_ack)); 974 } 975 976 static int 977 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, 978 struct pfsync_state_peer *dst) 979 { 980 int sync = 0; 981 982 PF_STATE_LOCK_ASSERT(st); 983 984 /* 985 * The state should never go backwards except 986 * for syn-proxy states. Neither should the 987 * sequence window slide backwards. 988 */ 989 if ((st->src.state > src->state && 990 (st->src.state < PF_TCPS_PROXY_SRC || 991 src->state >= PF_TCPS_PROXY_SRC)) || 992 993 (st->src.state == src->state && 994 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 995 sync++; 996 else 997 pf_state_peer_ntoh(src, &st->src); 998 999 if ((st->dst.state > dst->state) || 1000 1001 (st->dst.state >= TCPS_SYN_SENT && 1002 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 1003 sync++; 1004 else 1005 pf_state_peer_ntoh(dst, &st->dst); 1006 1007 return (sync); 1008 } 1009 1010 static int 1011 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) 1012 { 1013 struct pfsync_softc *sc = V_pfsyncif; 1014 union pfsync_state_union *sa, *sp; 1015 struct pf_kstate *st; 1016 struct mbuf *mp; 1017 int sync, offp, i, len, msg_version; 1018 1019 switch (action) { 1020 case PFSYNC_ACT_UPD_1301: 1021 len = sizeof(struct pfsync_state_1301) * count; 1022 msg_version = PFSYNC_MSG_VERSION_1301; 1023 break; 1024 case PFSYNC_ACT_UPD_1400: 1025 len = sizeof(struct pfsync_state_1400) * count; 1026 msg_version = PFSYNC_MSG_VERSION_1400; 1027 break; 1028 default: 1029 V_pfsyncstats.pfsyncs_badact++; 1030 return (-1); 1031 } 1032 1033 mp = m_pulldown(m, offset, len, &offp); 1034 if (mp == NULL) { 1035 V_pfsyncstats.pfsyncs_badlen++; 1036 return (-1); 1037 } 1038 sa = (union pfsync_state_union *)(mp->m_data + offp); 1039 1040 for (i = 0; i < count; i++) { 1041 sp = &sa[i]; 1042 1043 /* check for invalid values */ 1044 if (sp->pfs_1301.timeout >= PFTM_MAX || 1045 sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || 1046 sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { 1047 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1048 printf("pfsync_input: PFSYNC_ACT_UPD: " 1049 "invalid value\n"); 1050 } 1051 V_pfsyncstats.pfsyncs_badval++; 1052 continue; 1053 } 1054 1055 st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); 1056 if (st == NULL) { 1057 /* insert the update */ 1058 if (pfsync_state_import(sp, flags, msg_version)) 1059 V_pfsyncstats.pfsyncs_badstate++; 1060 continue; 1061 } 1062 1063 if (st->state_flags & PFSTATE_ACK) { 1064 pfsync_undefer_state(st, 1); 1065 } 1066 1067 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1068 sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); 1069 else { 1070 sync = 0; 1071 1072 /* 1073 * Non-TCP protocol state machine always go 1074 * forwards 1075 */ 1076 if (st->src.state > sp->pfs_1301.src.state) 1077 sync++; 1078 else 1079 pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); 1080 if (st->dst.state > sp->pfs_1301.dst.state) 1081 sync++; 1082 else 1083 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1084 } 1085 if (sync < 2) { 1086 pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); 1087 pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); 1088 st->expire = time_uptime; 1089 st->timeout = sp->pfs_1301.timeout; 1090 } 1091 st->pfsync_time = time_uptime; 1092 1093 if (sync) { 1094 V_pfsyncstats.pfsyncs_stale++; 1095 1096 pfsync_update_state(st); 1097 PF_STATE_UNLOCK(st); 1098 pfsync_push_all(sc); 1099 continue; 1100 } 1101 PF_STATE_UNLOCK(st); 1102 } 1103 1104 return (len); 1105 } 1106 1107 static int 1108 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) 1109 { 1110 struct pfsync_softc *sc = V_pfsyncif; 1111 struct pfsync_upd_c *ua, *up; 1112 struct pf_kstate *st; 1113 int len = count * sizeof(*up); 1114 int sync; 1115 struct mbuf *mp; 1116 int offp, i; 1117 1118 mp = m_pulldown(m, offset, len, &offp); 1119 if (mp == NULL) { 1120 V_pfsyncstats.pfsyncs_badlen++; 1121 return (-1); 1122 } 1123 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 1124 1125 for (i = 0; i < count; i++) { 1126 up = &ua[i]; 1127 1128 /* check for invalid values */ 1129 if (up->timeout >= PFTM_MAX || 1130 up->src.state > PF_TCPS_PROXY_DST || 1131 up->dst.state > PF_TCPS_PROXY_DST) { 1132 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1133 printf("pfsync_input: " 1134 "PFSYNC_ACT_UPD_C: " 1135 "invalid value\n"); 1136 } 1137 V_pfsyncstats.pfsyncs_badval++; 1138 continue; 1139 } 1140 1141 st = pf_find_state_byid(up->id, up->creatorid); 1142 if (st == NULL) { 1143 /* We don't have this state. Ask for it. */ 1144 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 1145 pfsync_request_update(up->creatorid, up->id); 1146 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 1147 continue; 1148 } 1149 1150 if (st->state_flags & PFSTATE_ACK) { 1151 pfsync_undefer_state(st, 1); 1152 } 1153 1154 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1155 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1156 else { 1157 sync = 0; 1158 1159 /* 1160 * Non-TCP protocol state machine always go 1161 * forwards 1162 */ 1163 if (st->src.state > up->src.state) 1164 sync++; 1165 else 1166 pf_state_peer_ntoh(&up->src, &st->src); 1167 if (st->dst.state > up->dst.state) 1168 sync++; 1169 else 1170 pf_state_peer_ntoh(&up->dst, &st->dst); 1171 } 1172 if (sync < 2) { 1173 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1174 pf_state_peer_ntoh(&up->dst, &st->dst); 1175 st->expire = time_uptime; 1176 st->timeout = up->timeout; 1177 } 1178 st->pfsync_time = time_uptime; 1179 1180 if (sync) { 1181 V_pfsyncstats.pfsyncs_stale++; 1182 1183 pfsync_update_state(st); 1184 PF_STATE_UNLOCK(st); 1185 pfsync_push_all(sc); 1186 continue; 1187 } 1188 PF_STATE_UNLOCK(st); 1189 } 1190 1191 return (len); 1192 } 1193 1194 static int 1195 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) 1196 { 1197 struct pfsync_upd_req *ur, *ura; 1198 struct mbuf *mp; 1199 int len = count * sizeof(*ur); 1200 int i, offp; 1201 1202 struct pf_kstate *st; 1203 1204 mp = m_pulldown(m, offset, len, &offp); 1205 if (mp == NULL) { 1206 V_pfsyncstats.pfsyncs_badlen++; 1207 return (-1); 1208 } 1209 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1210 1211 for (i = 0; i < count; i++) { 1212 ur = &ura[i]; 1213 1214 if (ur->id == 0 && ur->creatorid == 0) 1215 pfsync_bulk_start(); 1216 else { 1217 st = pf_find_state_byid(ur->id, ur->creatorid); 1218 if (st == NULL) { 1219 V_pfsyncstats.pfsyncs_badstate++; 1220 continue; 1221 } 1222 if (st->state_flags & PFSTATE_NOSYNC) { 1223 PF_STATE_UNLOCK(st); 1224 continue; 1225 } 1226 1227 pfsync_update_state_req(st); 1228 PF_STATE_UNLOCK(st); 1229 } 1230 } 1231 1232 return (len); 1233 } 1234 1235 static int 1236 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) 1237 { 1238 struct mbuf *mp; 1239 struct pfsync_del_c *sa, *sp; 1240 struct pf_kstate *st; 1241 int len = count * sizeof(*sp); 1242 int offp, i; 1243 1244 mp = m_pulldown(m, offset, len, &offp); 1245 if (mp == NULL) { 1246 V_pfsyncstats.pfsyncs_badlen++; 1247 return (-1); 1248 } 1249 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1250 1251 for (i = 0; i < count; i++) { 1252 sp = &sa[i]; 1253 1254 st = pf_find_state_byid(sp->id, sp->creatorid); 1255 if (st == NULL) { 1256 V_pfsyncstats.pfsyncs_badstate++; 1257 continue; 1258 } 1259 1260 st->state_flags |= PFSTATE_NOSYNC; 1261 pf_unlink_state(st); 1262 } 1263 1264 return (len); 1265 } 1266 1267 static int 1268 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) 1269 { 1270 struct pfsync_softc *sc = V_pfsyncif; 1271 struct pfsync_bus *bus; 1272 struct mbuf *mp; 1273 int len = count * sizeof(*bus); 1274 int offp; 1275 1276 PFSYNC_BLOCK(sc); 1277 1278 /* If we're not waiting for a bulk update, who cares. */ 1279 if (sc->sc_ureq_sent == 0) { 1280 PFSYNC_BUNLOCK(sc); 1281 return (len); 1282 } 1283 1284 mp = m_pulldown(m, offset, len, &offp); 1285 if (mp == NULL) { 1286 PFSYNC_BUNLOCK(sc); 1287 V_pfsyncstats.pfsyncs_badlen++; 1288 return (-1); 1289 } 1290 bus = (struct pfsync_bus *)(mp->m_data + offp); 1291 1292 switch (bus->status) { 1293 case PFSYNC_BUS_START: 1294 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1295 V_pf_limits[PF_LIMIT_STATES].limit / 1296 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1297 sizeof(union pfsync_state_union)), 1298 pfsync_bulk_fail, sc); 1299 if (V_pf_status.debug >= PF_DEBUG_MISC) 1300 printf("pfsync: received bulk update start\n"); 1301 break; 1302 1303 case PFSYNC_BUS_END: 1304 if (time_uptime - ntohl(bus->endtime) >= 1305 sc->sc_ureq_sent) { 1306 /* that's it, we're happy */ 1307 sc->sc_ureq_sent = 0; 1308 sc->sc_bulk_tries = 0; 1309 callout_stop(&sc->sc_bulkfail_tmo); 1310 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1311 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1312 "pfsync bulk done"); 1313 sc->sc_flags |= PFSYNCF_OK; 1314 if (V_pf_status.debug >= PF_DEBUG_MISC) 1315 printf("pfsync: received valid " 1316 "bulk update end\n"); 1317 } else { 1318 if (V_pf_status.debug >= PF_DEBUG_MISC) 1319 printf("pfsync: received invalid " 1320 "bulk update end: bad timestamp\n"); 1321 } 1322 break; 1323 } 1324 PFSYNC_BUNLOCK(sc); 1325 1326 return (len); 1327 } 1328 1329 static int 1330 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) 1331 { 1332 int len = count * sizeof(struct pfsync_tdb); 1333 1334 #if defined(IPSEC) 1335 struct pfsync_tdb *tp; 1336 struct mbuf *mp; 1337 int offp; 1338 int i; 1339 int s; 1340 1341 mp = m_pulldown(m, offset, len, &offp); 1342 if (mp == NULL) { 1343 V_pfsyncstats.pfsyncs_badlen++; 1344 return (-1); 1345 } 1346 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1347 1348 for (i = 0; i < count; i++) 1349 pfsync_update_net_tdb(&tp[i]); 1350 #endif 1351 1352 return (len); 1353 } 1354 1355 #if defined(IPSEC) 1356 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1357 static void 1358 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1359 { 1360 struct tdb *tdb; 1361 int s; 1362 1363 /* check for invalid values */ 1364 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1365 (pt->dst.sa.sa_family != AF_INET && 1366 pt->dst.sa.sa_family != AF_INET6)) 1367 goto bad; 1368 1369 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1370 if (tdb) { 1371 pt->rpl = ntohl(pt->rpl); 1372 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1373 1374 /* Neither replay nor byte counter should ever decrease. */ 1375 if (pt->rpl < tdb->tdb_rpl || 1376 pt->cur_bytes < tdb->tdb_cur_bytes) { 1377 goto bad; 1378 } 1379 1380 tdb->tdb_rpl = pt->rpl; 1381 tdb->tdb_cur_bytes = pt->cur_bytes; 1382 } 1383 return; 1384 1385 bad: 1386 if (V_pf_status.debug >= PF_DEBUG_MISC) 1387 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1388 "invalid value\n"); 1389 V_pfsyncstats.pfsyncs_badstate++; 1390 return; 1391 } 1392 #endif 1393 1394 static int 1395 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) 1396 { 1397 /* check if we are at the right place in the packet */ 1398 if (offset != m->m_pkthdr.len) 1399 V_pfsyncstats.pfsyncs_badlen++; 1400 1401 /* we're done. free and let the caller return */ 1402 m_freem(m); 1403 return (-1); 1404 } 1405 1406 static int 1407 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) 1408 { 1409 V_pfsyncstats.pfsyncs_badact++; 1410 1411 m_freem(m); 1412 return (-1); 1413 } 1414 1415 static int 1416 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1417 struct route *rt) 1418 { 1419 m_freem(m); 1420 return (0); 1421 } 1422 1423 /* ARGSUSED */ 1424 static int 1425 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1426 { 1427 struct pfsync_softc *sc = ifp->if_softc; 1428 struct ifreq *ifr = (struct ifreq *)data; 1429 struct pfsyncreq pfsyncr; 1430 size_t nvbuflen; 1431 int error; 1432 int c; 1433 1434 switch (cmd) { 1435 case SIOCSIFFLAGS: 1436 PFSYNC_LOCK(sc); 1437 if (ifp->if_flags & IFF_UP) { 1438 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1439 PFSYNC_UNLOCK(sc); 1440 pfsync_pointers_init(); 1441 } else { 1442 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1443 PFSYNC_UNLOCK(sc); 1444 pfsync_pointers_uninit(); 1445 } 1446 break; 1447 case SIOCSIFMTU: 1448 if (!sc->sc_sync_if || 1449 ifr->ifr_mtu <= PFSYNC_MINPKT || 1450 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1451 return (EINVAL); 1452 if (ifr->ifr_mtu < ifp->if_mtu) { 1453 for (c = 0; c < pfsync_buckets; c++) { 1454 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 1455 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) 1456 pfsync_sendout(1, c); 1457 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 1458 } 1459 } 1460 ifp->if_mtu = ifr->ifr_mtu; 1461 break; 1462 case SIOCGETPFSYNC: 1463 bzero(&pfsyncr, sizeof(pfsyncr)); 1464 PFSYNC_LOCK(sc); 1465 if (sc->sc_sync_if) { 1466 strlcpy(pfsyncr.pfsyncr_syncdev, 1467 sc->sc_sync_if->if_xname, IFNAMSIZ); 1468 } 1469 pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; 1470 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1471 pfsyncr.pfsyncr_defer = sc->sc_flags; 1472 PFSYNC_UNLOCK(sc); 1473 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1474 sizeof(pfsyncr))); 1475 1476 case SIOCGETPFSYNCNV: 1477 { 1478 nvlist_t *nvl_syncpeer; 1479 nvlist_t *nvl = nvlist_create(0); 1480 1481 if (nvl == NULL) 1482 return (ENOMEM); 1483 1484 if (sc->sc_sync_if) 1485 nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); 1486 nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); 1487 nvlist_add_number(nvl, "flags", sc->sc_flags); 1488 nvlist_add_number(nvl, "version", sc->sc_version); 1489 if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) 1490 nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); 1491 1492 void *packed = NULL; 1493 packed = nvlist_pack(nvl, &nvbuflen); 1494 if (packed == NULL) { 1495 free(packed, M_NVLIST); 1496 nvlist_destroy(nvl); 1497 return (ENOMEM); 1498 } 1499 1500 if (nvbuflen > ifr->ifr_cap_nv.buf_length) { 1501 ifr->ifr_cap_nv.length = nvbuflen; 1502 ifr->ifr_cap_nv.buffer = NULL; 1503 free(packed, M_NVLIST); 1504 nvlist_destroy(nvl); 1505 return (EFBIG); 1506 } 1507 1508 ifr->ifr_cap_nv.length = nvbuflen; 1509 error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); 1510 1511 nvlist_destroy(nvl); 1512 nvlist_destroy(nvl_syncpeer); 1513 free(packed, M_NVLIST); 1514 break; 1515 } 1516 1517 case SIOCSETPFSYNC: 1518 { 1519 struct pfsync_kstatus status; 1520 1521 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1522 return (error); 1523 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1524 sizeof(pfsyncr)))) 1525 return (error); 1526 1527 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1528 pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); 1529 1530 error = pfsync_kstatus_to_softc(&status, sc); 1531 return (error); 1532 } 1533 case SIOCSETPFSYNCNV: 1534 { 1535 struct pfsync_kstatus status; 1536 void *data; 1537 nvlist_t *nvl; 1538 1539 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1540 return (error); 1541 if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) 1542 return (EINVAL); 1543 1544 data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); 1545 1546 if ((error = copyin(ifr->ifr_cap_nv.buffer, data, 1547 ifr->ifr_cap_nv.length)) != 0) { 1548 free(data, M_TEMP); 1549 return (error); 1550 } 1551 1552 if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { 1553 free(data, M_TEMP); 1554 return (EINVAL); 1555 } 1556 1557 memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); 1558 pfsync_nvstatus_to_kstatus(nvl, &status); 1559 1560 nvlist_destroy(nvl); 1561 free(data, M_TEMP); 1562 1563 error = pfsync_kstatus_to_softc(&status, sc); 1564 return (error); 1565 } 1566 default: 1567 return (ENOTTY); 1568 } 1569 1570 return (0); 1571 } 1572 1573 static void 1574 pfsync_out_state_1301(struct pf_kstate *st, void *buf) 1575 { 1576 union pfsync_state_union *sp = buf; 1577 1578 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); 1579 } 1580 1581 static void 1582 pfsync_out_state_1400(struct pf_kstate *st, void *buf) 1583 { 1584 union pfsync_state_union *sp = buf; 1585 1586 pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); 1587 } 1588 1589 static void 1590 pfsync_out_iack(struct pf_kstate *st, void *buf) 1591 { 1592 struct pfsync_ins_ack *iack = buf; 1593 1594 iack->id = st->id; 1595 iack->creatorid = st->creatorid; 1596 } 1597 1598 static void 1599 pfsync_out_upd_c(struct pf_kstate *st, void *buf) 1600 { 1601 struct pfsync_upd_c *up = buf; 1602 1603 bzero(up, sizeof(*up)); 1604 up->id = st->id; 1605 pf_state_peer_hton(&st->src, &up->src); 1606 pf_state_peer_hton(&st->dst, &up->dst); 1607 up->creatorid = st->creatorid; 1608 up->timeout = st->timeout; 1609 } 1610 1611 static void 1612 pfsync_out_del_c(struct pf_kstate *st, void *buf) 1613 { 1614 struct pfsync_del_c *dp = buf; 1615 1616 dp->id = st->id; 1617 dp->creatorid = st->creatorid; 1618 st->state_flags |= PFSTATE_NOSYNC; 1619 } 1620 1621 static void 1622 pfsync_drop(struct pfsync_softc *sc) 1623 { 1624 struct pf_kstate *st, *next; 1625 struct pfsync_upd_req_item *ur; 1626 struct pfsync_bucket *b; 1627 int c; 1628 enum pfsync_q_id q; 1629 1630 for (c = 0; c < pfsync_buckets; c++) { 1631 b = &sc->sc_buckets[c]; 1632 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1633 if (TAILQ_EMPTY(&b->b_qs[q])) 1634 continue; 1635 1636 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { 1637 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1638 ("%s: st->sync_state == q", 1639 __func__)); 1640 st->sync_state = PFSYNC_S_NONE; 1641 pf_release_state(st); 1642 } 1643 TAILQ_INIT(&b->b_qs[q]); 1644 } 1645 1646 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1647 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1648 free(ur, M_PFSYNC); 1649 } 1650 1651 b->b_len = PFSYNC_MINPKT; 1652 b->b_plus = NULL; 1653 } 1654 } 1655 1656 static void 1657 pfsync_sendout(int schedswi, int c) 1658 { 1659 struct pfsync_softc *sc = V_pfsyncif; 1660 struct ifnet *ifp = sc->sc_ifp; 1661 struct mbuf *m; 1662 struct pfsync_header *ph; 1663 struct pfsync_subheader *subh; 1664 struct pf_kstate *st, *st_next; 1665 struct pfsync_upd_req_item *ur; 1666 struct pfsync_bucket *b = &sc->sc_buckets[c]; 1667 int aflen, offset, count = 0; 1668 enum pfsync_q_id q; 1669 1670 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1671 KASSERT(b->b_len > PFSYNC_MINPKT, 1672 ("%s: sc_len %zu", __func__, b->b_len)); 1673 PFSYNC_BUCKET_LOCK_ASSERT(b); 1674 1675 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1676 pfsync_drop(sc); 1677 return; 1678 } 1679 1680 m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1681 if (m == NULL) { 1682 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1683 V_pfsyncstats.pfsyncs_onomem++; 1684 return; 1685 } 1686 m->m_data += max_linkhdr; 1687 m->m_len = m->m_pkthdr.len = b->b_len; 1688 1689 /* build the ip header */ 1690 switch (sc->sc_sync_peer.ss_family) { 1691 #ifdef INET 1692 case AF_INET: 1693 { 1694 struct ip *ip; 1695 1696 ip = mtod(m, struct ip *); 1697 bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); 1698 aflen = offset = sizeof(*ip); 1699 1700 ip->ip_len = htons(m->m_pkthdr.len); 1701 ip_fillid(ip); 1702 break; 1703 } 1704 #endif 1705 default: 1706 m_freem(m); 1707 return; 1708 } 1709 1710 /* build the pfsync header */ 1711 ph = (struct pfsync_header *)(m->m_data + offset); 1712 bzero(ph, sizeof(*ph)); 1713 offset += sizeof(*ph); 1714 1715 ph->version = PFSYNC_VERSION; 1716 ph->len = htons(b->b_len - aflen); 1717 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1718 1719 /* walk the queues */ 1720 for (q = 0; q < PFSYNC_Q_COUNT; q++) { 1721 if (TAILQ_EMPTY(&b->b_qs[q])) 1722 continue; 1723 1724 subh = (struct pfsync_subheader *)(m->m_data + offset); 1725 offset += sizeof(*subh); 1726 1727 count = 0; 1728 TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { 1729 KASSERT(st->sync_state == pfsync_qid_sstate[q], 1730 ("%s: st->sync_state == q", 1731 __func__)); 1732 /* 1733 * XXXGL: some of write methods do unlocked reads 1734 * of state data :( 1735 */ 1736 pfsync_qs[q].write(st, m->m_data + offset); 1737 offset += pfsync_qs[q].len; 1738 st->sync_state = PFSYNC_S_NONE; 1739 pf_release_state(st); 1740 count++; 1741 } 1742 TAILQ_INIT(&b->b_qs[q]); 1743 1744 bzero(subh, sizeof(*subh)); 1745 subh->action = pfsync_qs[q].action; 1746 subh->count = htons(count); 1747 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1748 } 1749 1750 if (!TAILQ_EMPTY(&b->b_upd_req_list)) { 1751 subh = (struct pfsync_subheader *)(m->m_data + offset); 1752 offset += sizeof(*subh); 1753 1754 count = 0; 1755 while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { 1756 TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); 1757 1758 bcopy(&ur->ur_msg, m->m_data + offset, 1759 sizeof(ur->ur_msg)); 1760 offset += sizeof(ur->ur_msg); 1761 free(ur, M_PFSYNC); 1762 count++; 1763 } 1764 1765 bzero(subh, sizeof(*subh)); 1766 subh->action = PFSYNC_ACT_UPD_REQ; 1767 subh->count = htons(count); 1768 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1769 } 1770 1771 /* has someone built a custom region for us to add? */ 1772 if (b->b_plus != NULL) { 1773 bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); 1774 offset += b->b_pluslen; 1775 1776 b->b_plus = NULL; 1777 } 1778 1779 subh = (struct pfsync_subheader *)(m->m_data + offset); 1780 offset += sizeof(*subh); 1781 1782 bzero(subh, sizeof(*subh)); 1783 subh->action = PFSYNC_ACT_EOF; 1784 subh->count = htons(1); 1785 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1786 1787 /* we're done, let's put it on the wire */ 1788 if (ifp->if_bpf) { 1789 m->m_data += aflen; 1790 m->m_len = m->m_pkthdr.len = b->b_len - aflen; 1791 BPF_MTAP(ifp, m); 1792 m->m_data -= aflen; 1793 m->m_len = m->m_pkthdr.len = b->b_len; 1794 } 1795 1796 if (sc->sc_sync_if == NULL) { 1797 b->b_len = PFSYNC_MINPKT; 1798 m_freem(m); 1799 return; 1800 } 1801 1802 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1803 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1804 b->b_len = PFSYNC_MINPKT; 1805 1806 if (!_IF_QFULL(&b->b_snd)) 1807 _IF_ENQUEUE(&b->b_snd, m); 1808 else { 1809 m_freem(m); 1810 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1811 } 1812 if (schedswi) 1813 swi_sched(V_pfsync_swi_cookie, 0); 1814 } 1815 1816 static void 1817 pfsync_insert_state(struct pf_kstate *st) 1818 { 1819 struct pfsync_softc *sc = V_pfsyncif; 1820 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1821 1822 if (st->state_flags & PFSTATE_NOSYNC) 1823 return; 1824 1825 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1826 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1827 st->state_flags |= PFSTATE_NOSYNC; 1828 return; 1829 } 1830 1831 KASSERT(st->sync_state == PFSYNC_S_NONE, 1832 ("%s: st->sync_state %u", __func__, st->sync_state)); 1833 1834 PFSYNC_BUCKET_LOCK(b); 1835 if (b->b_len == PFSYNC_MINPKT) 1836 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 1837 1838 pfsync_q_ins(st, PFSYNC_S_INS, true); 1839 PFSYNC_BUCKET_UNLOCK(b); 1840 1841 st->sync_updates = 0; 1842 } 1843 1844 static int 1845 pfsync_defer(struct pf_kstate *st, struct mbuf *m) 1846 { 1847 struct pfsync_softc *sc = V_pfsyncif; 1848 struct pfsync_deferral *pd; 1849 struct pfsync_bucket *b; 1850 1851 if (m->m_flags & (M_BCAST|M_MCAST)) 1852 return (0); 1853 1854 if (sc == NULL) 1855 return (0); 1856 1857 b = pfsync_get_bucket(sc, st); 1858 1859 PFSYNC_LOCK(sc); 1860 1861 if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || 1862 !(sc->sc_flags & PFSYNCF_DEFER)) { 1863 PFSYNC_UNLOCK(sc); 1864 return (0); 1865 } 1866 1867 PFSYNC_BUCKET_LOCK(b); 1868 PFSYNC_UNLOCK(sc); 1869 1870 if (b->b_deferred >= 128) 1871 pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); 1872 1873 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1874 if (pd == NULL) { 1875 PFSYNC_BUCKET_UNLOCK(b); 1876 return (0); 1877 } 1878 b->b_deferred++; 1879 1880 m->m_flags |= M_SKIP_FIREWALL; 1881 st->state_flags |= PFSTATE_ACK; 1882 1883 pd->pd_sc = sc; 1884 pd->pd_st = st; 1885 pf_ref_state(st); 1886 pd->pd_m = m; 1887 1888 TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); 1889 callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); 1890 callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, 1891 pfsync_defer_tmo, pd); 1892 1893 pfsync_push(b); 1894 PFSYNC_BUCKET_UNLOCK(b); 1895 1896 return (1); 1897 } 1898 1899 static void 1900 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1901 { 1902 struct pfsync_softc *sc = pd->pd_sc; 1903 struct mbuf *m = pd->pd_m; 1904 struct pf_kstate *st = pd->pd_st; 1905 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1906 1907 PFSYNC_BUCKET_LOCK_ASSERT(b); 1908 1909 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1910 b->b_deferred--; 1911 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1912 free(pd, M_PFSYNC); 1913 pf_release_state(st); 1914 1915 if (drop) 1916 m_freem(m); 1917 else { 1918 _IF_ENQUEUE(&b->b_snd, m); 1919 pfsync_push(b); 1920 } 1921 } 1922 1923 static void 1924 pfsync_defer_tmo(void *arg) 1925 { 1926 struct epoch_tracker et; 1927 struct pfsync_deferral *pd = arg; 1928 struct pfsync_softc *sc = pd->pd_sc; 1929 struct mbuf *m = pd->pd_m; 1930 struct pf_kstate *st = pd->pd_st; 1931 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1932 1933 PFSYNC_BUCKET_LOCK_ASSERT(b); 1934 1935 TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); 1936 b->b_deferred--; 1937 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1938 PFSYNC_BUCKET_UNLOCK(b); 1939 free(pd, M_PFSYNC); 1940 1941 if (sc->sc_sync_if == NULL) { 1942 pf_release_state(st); 1943 m_freem(m); 1944 return; 1945 } 1946 1947 NET_EPOCH_ENTER(et); 1948 CURVNET_SET(sc->sc_sync_if->if_vnet); 1949 1950 pfsync_tx(sc, m); 1951 1952 pf_release_state(st); 1953 1954 CURVNET_RESTORE(); 1955 NET_EPOCH_EXIT(et); 1956 } 1957 1958 static void 1959 pfsync_undefer_state_locked(struct pf_kstate *st, int drop) 1960 { 1961 struct pfsync_softc *sc = V_pfsyncif; 1962 struct pfsync_deferral *pd; 1963 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1964 1965 PFSYNC_BUCKET_LOCK_ASSERT(b); 1966 1967 TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { 1968 if (pd->pd_st == st) { 1969 if (callout_stop(&pd->pd_tmo) > 0) 1970 pfsync_undefer(pd, drop); 1971 1972 return; 1973 } 1974 } 1975 1976 panic("%s: unable to find deferred state", __func__); 1977 } 1978 1979 static void 1980 pfsync_undefer_state(struct pf_kstate *st, int drop) 1981 { 1982 struct pfsync_softc *sc = V_pfsyncif; 1983 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 1984 1985 PFSYNC_BUCKET_LOCK(b); 1986 pfsync_undefer_state_locked(st, drop); 1987 PFSYNC_BUCKET_UNLOCK(b); 1988 } 1989 1990 static struct pfsync_bucket* 1991 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) 1992 { 1993 int c = PF_IDHASH(st) % pfsync_buckets; 1994 return &sc->sc_buckets[c]; 1995 } 1996 1997 static void 1998 pfsync_update_state(struct pf_kstate *st) 1999 { 2000 struct pfsync_softc *sc = V_pfsyncif; 2001 bool sync = false, ref = true; 2002 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2003 2004 PF_STATE_LOCK_ASSERT(st); 2005 PFSYNC_BUCKET_LOCK(b); 2006 2007 if (st->state_flags & PFSTATE_ACK) 2008 pfsync_undefer_state_locked(st, 0); 2009 if (st->state_flags & PFSTATE_NOSYNC) { 2010 if (st->sync_state != PFSYNC_S_NONE) 2011 pfsync_q_del(st, true, b); 2012 PFSYNC_BUCKET_UNLOCK(b); 2013 return; 2014 } 2015 2016 if (b->b_len == PFSYNC_MINPKT) 2017 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2018 2019 switch (st->sync_state) { 2020 case PFSYNC_S_UPD_C: 2021 case PFSYNC_S_UPD: 2022 case PFSYNC_S_INS: 2023 /* we're already handling it */ 2024 2025 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 2026 st->sync_updates++; 2027 if (st->sync_updates >= sc->sc_maxupdates) 2028 sync = true; 2029 } 2030 break; 2031 2032 case PFSYNC_S_IACK: 2033 pfsync_q_del(st, false, b); 2034 ref = false; 2035 /* FALLTHROUGH */ 2036 2037 case PFSYNC_S_NONE: 2038 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 2039 st->sync_updates = 0; 2040 break; 2041 2042 default: 2043 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2044 } 2045 2046 if (sync || (time_uptime - st->pfsync_time) < 2) 2047 pfsync_push(b); 2048 2049 PFSYNC_BUCKET_UNLOCK(b); 2050 } 2051 2052 static void 2053 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2054 { 2055 struct pfsync_softc *sc = V_pfsyncif; 2056 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2057 struct pfsync_upd_req_item *item; 2058 size_t nlen = sizeof(struct pfsync_upd_req); 2059 2060 PFSYNC_BUCKET_LOCK_ASSERT(b); 2061 2062 /* 2063 * This code does a bit to prevent multiple update requests for the 2064 * same state being generated. It searches current subheader queue, 2065 * but it doesn't lookup into queue of already packed datagrams. 2066 */ 2067 TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) 2068 if (item->ur_msg.id == id && 2069 item->ur_msg.creatorid == creatorid) 2070 return; 2071 2072 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 2073 if (item == NULL) 2074 return; /* XXX stats */ 2075 2076 item->ur_msg.id = id; 2077 item->ur_msg.creatorid = creatorid; 2078 2079 if (TAILQ_EMPTY(&b->b_upd_req_list)) 2080 nlen += sizeof(struct pfsync_subheader); 2081 2082 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2083 pfsync_sendout(0, 0); 2084 2085 nlen = sizeof(struct pfsync_subheader) + 2086 sizeof(struct pfsync_upd_req); 2087 } 2088 2089 TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); 2090 b->b_len += nlen; 2091 2092 pfsync_push(b); 2093 } 2094 2095 static bool 2096 pfsync_update_state_req(struct pf_kstate *st) 2097 { 2098 struct pfsync_softc *sc = V_pfsyncif; 2099 bool ref = true, full = false; 2100 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2101 2102 PF_STATE_LOCK_ASSERT(st); 2103 PFSYNC_BUCKET_LOCK(b); 2104 2105 if (st->state_flags & PFSTATE_NOSYNC) { 2106 if (st->sync_state != PFSYNC_S_NONE) 2107 pfsync_q_del(st, true, b); 2108 PFSYNC_BUCKET_UNLOCK(b); 2109 return (full); 2110 } 2111 2112 switch (st->sync_state) { 2113 case PFSYNC_S_UPD_C: 2114 case PFSYNC_S_IACK: 2115 pfsync_q_del(st, false, b); 2116 ref = false; 2117 /* FALLTHROUGH */ 2118 2119 case PFSYNC_S_NONE: 2120 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 2121 pfsync_push(b); 2122 break; 2123 2124 case PFSYNC_S_INS: 2125 case PFSYNC_S_UPD: 2126 case PFSYNC_S_DEL_C: 2127 /* we're already handling it */ 2128 break; 2129 2130 default: 2131 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2132 } 2133 2134 if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) 2135 full = true; 2136 2137 PFSYNC_BUCKET_UNLOCK(b); 2138 2139 return (full); 2140 } 2141 2142 static void 2143 pfsync_delete_state(struct pf_kstate *st) 2144 { 2145 struct pfsync_softc *sc = V_pfsyncif; 2146 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2147 bool ref = true; 2148 2149 PFSYNC_BUCKET_LOCK(b); 2150 if (st->state_flags & PFSTATE_ACK) 2151 pfsync_undefer_state_locked(st, 1); 2152 if (st->state_flags & PFSTATE_NOSYNC) { 2153 if (st->sync_state != PFSYNC_S_NONE) 2154 pfsync_q_del(st, true, b); 2155 PFSYNC_BUCKET_UNLOCK(b); 2156 return; 2157 } 2158 2159 if (b->b_len == PFSYNC_MINPKT) 2160 callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); 2161 2162 switch (st->sync_state) { 2163 case PFSYNC_S_INS: 2164 /* We never got to tell the world so just forget about it. */ 2165 pfsync_q_del(st, true, b); 2166 break; 2167 2168 case PFSYNC_S_UPD_C: 2169 case PFSYNC_S_UPD: 2170 case PFSYNC_S_IACK: 2171 pfsync_q_del(st, false, b); 2172 ref = false; 2173 /* FALLTHROUGH */ 2174 2175 case PFSYNC_S_NONE: 2176 pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); 2177 break; 2178 2179 default: 2180 panic("%s: unexpected sync state %d", __func__, st->sync_state); 2181 } 2182 2183 PFSYNC_BUCKET_UNLOCK(b); 2184 } 2185 2186 static void 2187 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2188 { 2189 struct { 2190 struct pfsync_subheader subh; 2191 struct pfsync_clr clr; 2192 } __packed r; 2193 2194 bzero(&r, sizeof(r)); 2195 2196 r.subh.action = PFSYNC_ACT_CLR; 2197 r.subh.count = htons(1); 2198 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 2199 2200 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2201 r.clr.creatorid = creatorid; 2202 2203 pfsync_send_plus(&r, sizeof(r)); 2204 } 2205 2206 static enum pfsync_q_id 2207 pfsync_sstate_to_qid(u_int8_t sync_state) 2208 { 2209 struct pfsync_softc *sc = V_pfsyncif; 2210 2211 switch (sync_state) { 2212 case PFSYNC_S_INS: 2213 switch (sc->sc_version) { 2214 case PFSYNC_MSG_VERSION_1301: 2215 return PFSYNC_Q_INS_1301; 2216 case PFSYNC_MSG_VERSION_1400: 2217 return PFSYNC_Q_INS_1400; 2218 } 2219 break; 2220 case PFSYNC_S_IACK: 2221 return PFSYNC_Q_IACK; 2222 case PFSYNC_S_UPD: 2223 switch (sc->sc_version) { 2224 case PFSYNC_MSG_VERSION_1301: 2225 return PFSYNC_Q_UPD_1301; 2226 case PFSYNC_MSG_VERSION_1400: 2227 return PFSYNC_Q_UPD_1400; 2228 } 2229 break; 2230 case PFSYNC_S_UPD_C: 2231 return PFSYNC_Q_UPD_C; 2232 case PFSYNC_S_DEL_C: 2233 return PFSYNC_Q_DEL_C; 2234 default: 2235 panic("%s: Unsupported st->sync_state 0x%02x", 2236 __func__, sync_state); 2237 } 2238 2239 panic("%s: Unsupported pfsync_msg_version %d", 2240 __func__, sc->sc_version); 2241 } 2242 2243 static void 2244 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) 2245 { 2246 enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); 2247 struct pfsync_softc *sc = V_pfsyncif; 2248 size_t nlen = pfsync_qs[q].len; 2249 struct pfsync_bucket *b = pfsync_get_bucket(sc, st); 2250 2251 PFSYNC_BUCKET_LOCK_ASSERT(b); 2252 2253 KASSERT(st->sync_state == PFSYNC_S_NONE, 2254 ("%s: st->sync_state %u", __func__, st->sync_state)); 2255 KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2256 b->b_len)); 2257 2258 if (TAILQ_EMPTY(&b->b_qs[q])) 2259 nlen += sizeof(struct pfsync_subheader); 2260 2261 if (b->b_len + nlen > sc->sc_ifp->if_mtu) { 2262 pfsync_sendout(1, b->b_id); 2263 2264 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2265 } 2266 2267 b->b_len += nlen; 2268 TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); 2269 st->sync_state = pfsync_qid_sstate[q]; 2270 if (ref) 2271 pf_ref_state(st); 2272 } 2273 2274 static void 2275 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) 2276 { 2277 enum pfsync_q_id q; 2278 2279 PFSYNC_BUCKET_LOCK_ASSERT(b); 2280 KASSERT(st->sync_state != PFSYNC_S_NONE, 2281 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2282 2283 q = pfsync_sstate_to_qid(st->sync_state); 2284 b->b_len -= pfsync_qs[q].len; 2285 TAILQ_REMOVE(&b->b_qs[q], st, sync_list); 2286 st->sync_state = PFSYNC_S_NONE; 2287 if (unref) 2288 pf_release_state(st); 2289 2290 if (TAILQ_EMPTY(&b->b_qs[q])) 2291 b->b_len -= sizeof(struct pfsync_subheader); 2292 } 2293 2294 static void 2295 pfsync_bulk_start(void) 2296 { 2297 struct pfsync_softc *sc = V_pfsyncif; 2298 2299 if (V_pf_status.debug >= PF_DEBUG_MISC) 2300 printf("pfsync: received bulk update request\n"); 2301 2302 PFSYNC_BLOCK(sc); 2303 2304 sc->sc_ureq_received = time_uptime; 2305 sc->sc_bulk_hashid = 0; 2306 sc->sc_bulk_stateid = 0; 2307 pfsync_bulk_status(PFSYNC_BUS_START); 2308 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2309 PFSYNC_BUNLOCK(sc); 2310 } 2311 2312 static void 2313 pfsync_bulk_update(void *arg) 2314 { 2315 struct pfsync_softc *sc = arg; 2316 struct pf_kstate *s; 2317 int i; 2318 2319 PFSYNC_BLOCK_ASSERT(sc); 2320 CURVNET_SET(sc->sc_ifp->if_vnet); 2321 2322 /* 2323 * Start with last state from previous invocation. 2324 * It may had gone, in this case start from the 2325 * hash slot. 2326 */ 2327 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2328 2329 if (s != NULL) 2330 i = PF_IDHASH(s); 2331 else 2332 i = sc->sc_bulk_hashid; 2333 2334 for (; i <= pf_hashmask; i++) { 2335 struct pf_idhash *ih = &V_pf_idhash[i]; 2336 2337 if (s != NULL) 2338 PF_HASHROW_ASSERT(ih); 2339 else { 2340 PF_HASHROW_LOCK(ih); 2341 s = LIST_FIRST(&ih->states); 2342 } 2343 2344 for (; s; s = LIST_NEXT(s, entry)) { 2345 if (s->sync_state == PFSYNC_S_NONE && 2346 s->timeout < PFTM_MAX && 2347 s->pfsync_time <= sc->sc_ureq_received) { 2348 if (pfsync_update_state_req(s)) { 2349 /* We've filled a packet. */ 2350 sc->sc_bulk_hashid = i; 2351 sc->sc_bulk_stateid = s->id; 2352 sc->sc_bulk_creatorid = s->creatorid; 2353 PF_HASHROW_UNLOCK(ih); 2354 callout_reset(&sc->sc_bulk_tmo, 1, 2355 pfsync_bulk_update, sc); 2356 goto full; 2357 } 2358 } 2359 } 2360 PF_HASHROW_UNLOCK(ih); 2361 } 2362 2363 /* We're done. */ 2364 pfsync_bulk_status(PFSYNC_BUS_END); 2365 full: 2366 CURVNET_RESTORE(); 2367 } 2368 2369 static void 2370 pfsync_bulk_status(u_int8_t status) 2371 { 2372 struct { 2373 struct pfsync_subheader subh; 2374 struct pfsync_bus bus; 2375 } __packed r; 2376 2377 struct pfsync_softc *sc = V_pfsyncif; 2378 2379 bzero(&r, sizeof(r)); 2380 2381 r.subh.action = PFSYNC_ACT_BUS; 2382 r.subh.count = htons(1); 2383 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2384 2385 r.bus.creatorid = V_pf_status.hostid; 2386 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2387 r.bus.status = status; 2388 2389 pfsync_send_plus(&r, sizeof(r)); 2390 } 2391 2392 static void 2393 pfsync_bulk_fail(void *arg) 2394 { 2395 struct pfsync_softc *sc = arg; 2396 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2397 2398 CURVNET_SET(sc->sc_ifp->if_vnet); 2399 2400 PFSYNC_BLOCK_ASSERT(sc); 2401 2402 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2403 /* Try again */ 2404 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2405 pfsync_bulk_fail, V_pfsyncif); 2406 PFSYNC_BUCKET_LOCK(b); 2407 pfsync_request_update(0, 0); 2408 PFSYNC_BUCKET_UNLOCK(b); 2409 } else { 2410 /* Pretend like the transfer was ok. */ 2411 sc->sc_ureq_sent = 0; 2412 sc->sc_bulk_tries = 0; 2413 PFSYNC_LOCK(sc); 2414 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2415 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2416 "pfsync bulk fail"); 2417 sc->sc_flags |= PFSYNCF_OK; 2418 PFSYNC_UNLOCK(sc); 2419 if (V_pf_status.debug >= PF_DEBUG_MISC) 2420 printf("pfsync: failed to receive bulk update\n"); 2421 } 2422 2423 CURVNET_RESTORE(); 2424 } 2425 2426 static void 2427 pfsync_send_plus(void *plus, size_t pluslen) 2428 { 2429 struct pfsync_softc *sc = V_pfsyncif; 2430 struct pfsync_bucket *b = &sc->sc_buckets[0]; 2431 2432 PFSYNC_BUCKET_LOCK(b); 2433 2434 if (b->b_len + pluslen > sc->sc_ifp->if_mtu) 2435 pfsync_sendout(1, b->b_id); 2436 2437 b->b_plus = plus; 2438 b->b_len += (b->b_pluslen = pluslen); 2439 2440 pfsync_sendout(1, b->b_id); 2441 PFSYNC_BUCKET_UNLOCK(b); 2442 } 2443 2444 static void 2445 pfsync_timeout(void *arg) 2446 { 2447 struct pfsync_bucket *b = arg; 2448 2449 CURVNET_SET(b->b_sc->sc_ifp->if_vnet); 2450 PFSYNC_BUCKET_LOCK(b); 2451 pfsync_push(b); 2452 PFSYNC_BUCKET_UNLOCK(b); 2453 CURVNET_RESTORE(); 2454 } 2455 2456 static void 2457 pfsync_push(struct pfsync_bucket *b) 2458 { 2459 2460 PFSYNC_BUCKET_LOCK_ASSERT(b); 2461 2462 b->b_flags |= PFSYNCF_BUCKET_PUSH; 2463 swi_sched(V_pfsync_swi_cookie, 0); 2464 } 2465 2466 static void 2467 pfsync_push_all(struct pfsync_softc *sc) 2468 { 2469 int c; 2470 struct pfsync_bucket *b; 2471 2472 for (c = 0; c < pfsync_buckets; c++) { 2473 b = &sc->sc_buckets[c]; 2474 2475 PFSYNC_BUCKET_LOCK(b); 2476 pfsync_push(b); 2477 PFSYNC_BUCKET_UNLOCK(b); 2478 } 2479 } 2480 2481 static void 2482 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) 2483 { 2484 struct ip *ip; 2485 int af, error = 0; 2486 2487 ip = mtod(m, struct ip *); 2488 MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); 2489 2490 af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; 2491 2492 /* 2493 * We distinguish between a deferral packet and our 2494 * own pfsync packet based on M_SKIP_FIREWALL 2495 * flag. This is XXX. 2496 */ 2497 switch (af) { 2498 #ifdef INET 2499 case AF_INET: 2500 if (m->m_flags & M_SKIP_FIREWALL) { 2501 error = ip_output(m, NULL, NULL, 0, 2502 NULL, NULL); 2503 } else { 2504 error = ip_output(m, NULL, NULL, 2505 IP_RAWOUTPUT, &sc->sc_imo, NULL); 2506 } 2507 break; 2508 #endif 2509 #ifdef INET6 2510 case AF_INET6: 2511 if (m->m_flags & M_SKIP_FIREWALL) { 2512 error = ip6_output(m, NULL, NULL, 0, 2513 NULL, NULL, NULL); 2514 } else { 2515 MPASS(false); 2516 /* We don't support pfsync over IPv6. */ 2517 /*error = ip6_output(m, NULL, NULL, 2518 IP_RAWOUTPUT, &sc->sc_imo6, NULL);*/ 2519 } 2520 break; 2521 #endif 2522 } 2523 2524 if (error == 0) 2525 V_pfsyncstats.pfsyncs_opackets++; 2526 else 2527 V_pfsyncstats.pfsyncs_oerrors++; 2528 2529 } 2530 2531 static void 2532 pfsyncintr(void *arg) 2533 { 2534 struct epoch_tracker et; 2535 struct pfsync_softc *sc = arg; 2536 struct pfsync_bucket *b; 2537 struct mbuf *m, *n; 2538 int c; 2539 2540 NET_EPOCH_ENTER(et); 2541 CURVNET_SET(sc->sc_ifp->if_vnet); 2542 2543 for (c = 0; c < pfsync_buckets; c++) { 2544 b = &sc->sc_buckets[c]; 2545 2546 PFSYNC_BUCKET_LOCK(b); 2547 if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { 2548 pfsync_sendout(0, b->b_id); 2549 b->b_flags &= ~PFSYNCF_BUCKET_PUSH; 2550 } 2551 _IF_DEQUEUE_ALL(&b->b_snd, m); 2552 PFSYNC_BUCKET_UNLOCK(b); 2553 2554 for (; m != NULL; m = n) { 2555 n = m->m_nextpkt; 2556 m->m_nextpkt = NULL; 2557 2558 pfsync_tx(sc, m); 2559 } 2560 } 2561 CURVNET_RESTORE(); 2562 NET_EPOCH_EXIT(et); 2563 } 2564 2565 static int 2566 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, 2567 struct in_mfilter *imf) 2568 { 2569 struct ip_moptions *imo = &sc->sc_imo; 2570 int error; 2571 2572 if (!(ifp->if_flags & IFF_MULTICAST)) 2573 return (EADDRNOTAVAIL); 2574 2575 switch (sc->sc_sync_peer.ss_family) { 2576 #ifdef INET 2577 case AF_INET: 2578 { 2579 ip_mfilter_init(&imo->imo_head); 2580 imo->imo_multicast_vif = -1; 2581 if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, 2582 &imf->imf_inm)) != 0) 2583 return (error); 2584 2585 ip_mfilter_insert(&imo->imo_head, imf); 2586 imo->imo_multicast_ifp = ifp; 2587 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2588 imo->imo_multicast_loop = 0; 2589 break; 2590 } 2591 #endif 2592 } 2593 2594 return (0); 2595 } 2596 2597 static void 2598 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2599 { 2600 struct ip_moptions *imo = &sc->sc_imo; 2601 struct in_mfilter *imf; 2602 2603 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 2604 ip_mfilter_remove(&imo->imo_head, imf); 2605 in_leavegroup(imf->imf_inm, NULL); 2606 ip_mfilter_free(imf); 2607 } 2608 imo->imo_multicast_ifp = NULL; 2609 } 2610 2611 void 2612 pfsync_detach_ifnet(struct ifnet *ifp) 2613 { 2614 struct pfsync_softc *sc = V_pfsyncif; 2615 2616 if (sc == NULL) 2617 return; 2618 2619 PFSYNC_LOCK(sc); 2620 2621 if (sc->sc_sync_if == ifp) { 2622 /* We don't need mutlicast cleanup here, because the interface 2623 * is going away. We do need to ensure we don't try to do 2624 * cleanup later. 2625 */ 2626 ip_mfilter_init(&sc->sc_imo.imo_head); 2627 sc->sc_imo.imo_multicast_ifp = NULL; 2628 sc->sc_sync_if = NULL; 2629 } 2630 2631 PFSYNC_UNLOCK(sc); 2632 } 2633 2634 static int 2635 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) 2636 { 2637 struct sockaddr_storage sa; 2638 status->maxupdates = pfsyncr->pfsyncr_maxupdates; 2639 status->flags = pfsyncr->pfsyncr_defer; 2640 2641 strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); 2642 2643 memset(&sa, 0, sizeof(sa)); 2644 if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { 2645 struct sockaddr_in *in = (struct sockaddr_in *)&sa; 2646 in->sin_family = AF_INET; 2647 in->sin_len = sizeof(*in); 2648 in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; 2649 } 2650 status->syncpeer = sa; 2651 2652 return 0; 2653 } 2654 2655 static int 2656 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) 2657 { 2658 struct in_mfilter *imf = NULL; 2659 struct ifnet *sifp; 2660 struct ip *ip; 2661 int error; 2662 int c; 2663 2664 if ((status->maxupdates < 0) || (status->maxupdates > 255)) 2665 return (EINVAL); 2666 2667 if (status->syncdev[0] == '\0') 2668 sifp = NULL; 2669 else if ((sifp = ifunit_ref(status->syncdev)) == NULL) 2670 return (EINVAL); 2671 2672 struct sockaddr_in *status_sin = 2673 (struct sockaddr_in *)&(status->syncpeer); 2674 if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || 2675 status_sin->sin_addr.s_addr == 2676 htonl(INADDR_PFSYNC_GROUP))) 2677 imf = ip_mfilter_alloc(M_WAITOK, 0, 0); 2678 2679 PFSYNC_LOCK(sc); 2680 2681 switch (status->version) { 2682 case PFSYNC_MSG_VERSION_UNSPECIFIED: 2683 sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; 2684 break; 2685 case PFSYNC_MSG_VERSION_1301: 2686 case PFSYNC_MSG_VERSION_1400: 2687 sc->sc_version = status->version; 2688 break; 2689 default: 2690 PFSYNC_UNLOCK(sc); 2691 return (EINVAL); 2692 } 2693 2694 struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; 2695 sc_sin->sin_family = AF_INET; 2696 sc_sin->sin_len = sizeof(*sc_sin); 2697 if (status_sin->sin_addr.s_addr == 0) { 2698 sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); 2699 } else { 2700 sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; 2701 } 2702 2703 sc->sc_maxupdates = status->maxupdates; 2704 if (status->flags & PFSYNCF_DEFER) { 2705 sc->sc_flags |= PFSYNCF_DEFER; 2706 V_pfsync_defer_ptr = pfsync_defer; 2707 } else { 2708 sc->sc_flags &= ~PFSYNCF_DEFER; 2709 V_pfsync_defer_ptr = NULL; 2710 } 2711 2712 if (sifp == NULL) { 2713 if (sc->sc_sync_if) 2714 if_rele(sc->sc_sync_if); 2715 sc->sc_sync_if = NULL; 2716 pfsync_multicast_cleanup(sc); 2717 PFSYNC_UNLOCK(sc); 2718 return (0); 2719 } 2720 2721 for (c = 0; c < pfsync_buckets; c++) { 2722 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); 2723 if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && 2724 (sifp->if_mtu < sc->sc_ifp->if_mtu || 2725 (sc->sc_sync_if != NULL && 2726 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 2727 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 2728 pfsync_sendout(1, c); 2729 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); 2730 } 2731 2732 pfsync_multicast_cleanup(sc); 2733 2734 if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 2735 error = pfsync_multicast_setup(sc, sifp, imf); 2736 if (error) { 2737 if_rele(sifp); 2738 ip_mfilter_free(imf); 2739 PFSYNC_UNLOCK(sc); 2740 return (error); 2741 } 2742 } 2743 if (sc->sc_sync_if) 2744 if_rele(sc->sc_sync_if); 2745 sc->sc_sync_if = sifp; 2746 2747 ip = &sc->sc_template.ipv4; 2748 bzero(ip, sizeof(*ip)); 2749 ip->ip_v = IPVERSION; 2750 ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; 2751 ip->ip_tos = IPTOS_LOWDELAY; 2752 /* len and id are set later. */ 2753 ip->ip_off = htons(IP_DF); 2754 ip->ip_ttl = PFSYNC_DFLTTL; 2755 ip->ip_p = IPPROTO_PFSYNC; 2756 ip->ip_src.s_addr = INADDR_ANY; 2757 ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; 2758 2759 /* Request a full state table update. */ 2760 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2761 (*carp_demote_adj_p)(V_pfsync_carp_adj, 2762 "pfsync bulk start"); 2763 sc->sc_flags &= ~PFSYNCF_OK; 2764 if (V_pf_status.debug >= PF_DEBUG_MISC) 2765 printf("pfsync: requesting bulk update\n"); 2766 PFSYNC_UNLOCK(sc); 2767 PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); 2768 pfsync_request_update(0, 0); 2769 PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); 2770 PFSYNC_BLOCK(sc); 2771 sc->sc_ureq_sent = time_uptime; 2772 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); 2773 PFSYNC_BUNLOCK(sc); 2774 return (0); 2775 } 2776 2777 static void 2778 pfsync_pointers_init(void) 2779 { 2780 2781 PF_RULES_WLOCK(); 2782 V_pfsync_state_import_ptr = pfsync_state_import; 2783 V_pfsync_insert_state_ptr = pfsync_insert_state; 2784 V_pfsync_update_state_ptr = pfsync_update_state; 2785 V_pfsync_delete_state_ptr = pfsync_delete_state; 2786 V_pfsync_clear_states_ptr = pfsync_clear_states; 2787 V_pfsync_defer_ptr = pfsync_defer; 2788 PF_RULES_WUNLOCK(); 2789 } 2790 2791 static void 2792 pfsync_pointers_uninit(void) 2793 { 2794 2795 PF_RULES_WLOCK(); 2796 V_pfsync_state_import_ptr = NULL; 2797 V_pfsync_insert_state_ptr = NULL; 2798 V_pfsync_update_state_ptr = NULL; 2799 V_pfsync_delete_state_ptr = NULL; 2800 V_pfsync_clear_states_ptr = NULL; 2801 V_pfsync_defer_ptr = NULL; 2802 PF_RULES_WUNLOCK(); 2803 } 2804 2805 static void 2806 vnet_pfsync_init(const void *unused __unused) 2807 { 2808 int error; 2809 2810 V_pfsync_cloner = if_clone_simple(pfsyncname, 2811 pfsync_clone_create, pfsync_clone_destroy, 1); 2812 error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, 2813 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2814 if (error) { 2815 if_clone_detach(V_pfsync_cloner); 2816 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2817 } 2818 2819 pfsync_pointers_init(); 2820 } 2821 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2822 vnet_pfsync_init, NULL); 2823 2824 static void 2825 vnet_pfsync_uninit(const void *unused __unused) 2826 { 2827 int ret __diagused; 2828 2829 pfsync_pointers_uninit(); 2830 2831 if_clone_detach(V_pfsync_cloner); 2832 ret = swi_remove(V_pfsync_swi_cookie); 2833 MPASS(ret == 0); 2834 ret = intr_event_destroy(V_pfsync_swi_ie); 2835 MPASS(ret == 0); 2836 } 2837 2838 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 2839 vnet_pfsync_uninit, NULL); 2840 2841 static int 2842 pfsync_init(void) 2843 { 2844 #ifdef INET 2845 int error; 2846 2847 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 2848 2849 error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); 2850 if (error) 2851 return (error); 2852 #endif 2853 2854 return (0); 2855 } 2856 2857 static void 2858 pfsync_uninit(void) 2859 { 2860 pfsync_detach_ifnet_ptr = NULL; 2861 2862 #ifdef INET 2863 ipproto_unregister(IPPROTO_PFSYNC); 2864 #endif 2865 } 2866 2867 static int 2868 pfsync_modevent(module_t mod, int type, void *data) 2869 { 2870 int error = 0; 2871 2872 switch (type) { 2873 case MOD_LOAD: 2874 error = pfsync_init(); 2875 break; 2876 case MOD_UNLOAD: 2877 pfsync_uninit(); 2878 break; 2879 default: 2880 error = EINVAL; 2881 break; 2882 } 2883 2884 return (error); 2885 } 2886 2887 static moduledata_t pfsync_mod = { 2888 pfsyncname, 2889 pfsync_modevent, 2890 0 2891 }; 2892 2893 #define PFSYNC_MODVER 1 2894 2895 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2896 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2897 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2898 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2899