1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/priv.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/sockio.h> 82 #include <sys/sysctl.h> 83 #include <sys/syslog.h> 84 85 #include <net/bpf.h> 86 #include <net/if.h> 87 #include <net/if_var.h> 88 #include <net/if_clone.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/if_pfsync.h> 93 94 #include <netinet/if_ether.h> 95 #include <netinet/in.h> 96 #include <netinet/in_var.h> 97 #include <netinet/ip.h> 98 #include <netinet/ip_carp.h> 99 #include <netinet/ip_var.h> 100 #include <netinet/tcp.h> 101 #include <netinet/tcp_fsm.h> 102 #include <netinet/tcp_seq.h> 103 104 #define PFSYNC_MINPKT ( \ 105 sizeof(struct ip) + \ 106 sizeof(struct pfsync_header) + \ 107 sizeof(struct pfsync_subheader) ) 108 109 struct pfsync_pkt { 110 struct ip *ip; 111 struct in_addr src; 112 u_int8_t flags; 113 }; 114 115 static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 116 struct pfsync_state_peer *); 117 static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 118 static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 119 static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 120 static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 121 static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 122 static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 123 static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 124 static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 125 static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 126 static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 127 static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 128 static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 129 130 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 131 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 132 pfsync_in_ins, /* PFSYNC_ACT_INS */ 133 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 134 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 135 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 136 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 137 pfsync_in_del, /* PFSYNC_ACT_DEL */ 138 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 139 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 140 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 141 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 142 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 143 pfsync_in_eof /* PFSYNC_ACT_EOF */ 144 }; 145 146 struct pfsync_q { 147 void (*write)(struct pf_state *, void *); 148 size_t len; 149 u_int8_t action; 150 }; 151 152 /* we have one of these for every PFSYNC_S_ */ 153 static void pfsync_out_state(struct pf_state *, void *); 154 static void pfsync_out_iack(struct pf_state *, void *); 155 static void pfsync_out_upd_c(struct pf_state *, void *); 156 static void pfsync_out_del(struct pf_state *, void *); 157 158 static struct pfsync_q pfsync_qs[] = { 159 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 160 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 161 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 162 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 163 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 164 }; 165 166 static void pfsync_q_ins(struct pf_state *, int, bool); 167 static void pfsync_q_del(struct pf_state *, bool); 168 169 static void pfsync_update_state(struct pf_state *); 170 171 struct pfsync_upd_req_item { 172 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 173 struct pfsync_upd_req ur_msg; 174 }; 175 176 struct pfsync_deferral { 177 struct pfsync_softc *pd_sc; 178 TAILQ_ENTRY(pfsync_deferral) pd_entry; 179 u_int pd_refs; 180 struct callout pd_tmo; 181 182 struct pf_state *pd_st; 183 struct mbuf *pd_m; 184 }; 185 186 struct pfsync_softc { 187 /* Configuration */ 188 struct ifnet *sc_ifp; 189 struct ifnet *sc_sync_if; 190 struct ip_moptions sc_imo; 191 struct in_addr sc_sync_peer; 192 uint32_t sc_flags; 193 #define PFSYNCF_OK 0x00000001 194 #define PFSYNCF_DEFER 0x00000002 195 #define PFSYNCF_PUSH 0x00000004 196 uint8_t sc_maxupdates; 197 struct ip sc_template; 198 struct callout sc_tmo; 199 struct mtx sc_mtx; 200 201 /* Queued data */ 202 size_t sc_len; 203 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 204 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 205 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 206 u_int sc_deferred; 207 void *sc_plus; 208 size_t sc_pluslen; 209 210 /* Bulk update info */ 211 struct mtx sc_bulk_mtx; 212 uint32_t sc_ureq_sent; 213 int sc_bulk_tries; 214 uint32_t sc_ureq_received; 215 int sc_bulk_hashid; 216 uint64_t sc_bulk_stateid; 217 uint32_t sc_bulk_creatorid; 218 struct callout sc_bulk_tmo; 219 struct callout sc_bulkfail_tmo; 220 }; 221 222 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 223 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 224 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 225 226 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 227 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 228 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 229 230 static const char pfsyncname[] = "pfsync"; 231 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 232 VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; 233 #define V_pfsyncif VNET(pfsyncif) 234 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; 235 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 236 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); 237 #define V_pfsyncstats VNET(pfsyncstats) 238 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; 239 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 240 241 static void pfsync_timeout(void *); 242 static void pfsync_push(struct pfsync_softc *); 243 static void pfsyncintr(void *); 244 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 245 void *); 246 static void pfsync_multicast_cleanup(struct pfsync_softc *); 247 static void pfsync_pointers_init(void); 248 static void pfsync_pointers_uninit(void); 249 static int pfsync_init(void); 250 static void pfsync_uninit(void); 251 252 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 253 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 254 &VNET_NAME(pfsyncstats), pfsyncstats, 255 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 256 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 257 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 258 259 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 260 static void pfsync_clone_destroy(struct ifnet *); 261 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 262 struct pf_state_peer *); 263 static int pfsyncoutput(struct ifnet *, struct mbuf *, 264 const struct sockaddr *, struct route *); 265 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 266 267 static int pfsync_defer(struct pf_state *, struct mbuf *); 268 static void pfsync_undefer(struct pfsync_deferral *, int); 269 static void pfsync_undefer_state(struct pf_state *, int); 270 static void pfsync_defer_tmo(void *); 271 272 static void pfsync_request_update(u_int32_t, u_int64_t); 273 static void pfsync_update_state_req(struct pf_state *); 274 275 static void pfsync_drop(struct pfsync_softc *); 276 static void pfsync_sendout(int); 277 static void pfsync_send_plus(void *, size_t); 278 279 static void pfsync_bulk_start(void); 280 static void pfsync_bulk_status(u_int8_t); 281 static void pfsync_bulk_update(void *); 282 static void pfsync_bulk_fail(void *); 283 284 static void pfsync_detach_ifnet(struct ifnet *); 285 #ifdef IPSEC 286 static void pfsync_update_net_tdb(struct pfsync_tdb *); 287 #endif 288 289 #define PFSYNC_MAX_BULKTRIES 12 290 291 VNET_DEFINE(struct if_clone *, pfsync_cloner); 292 #define V_pfsync_cloner VNET(pfsync_cloner) 293 294 static int 295 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 296 { 297 struct pfsync_softc *sc; 298 struct ifnet *ifp; 299 int q; 300 301 if (unit != 0) 302 return (EINVAL); 303 304 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 305 sc->sc_flags |= PFSYNCF_OK; 306 307 for (q = 0; q < PFSYNC_S_COUNT; q++) 308 TAILQ_INIT(&sc->sc_qs[q]); 309 310 TAILQ_INIT(&sc->sc_upd_req_list); 311 TAILQ_INIT(&sc->sc_deferrals); 312 313 sc->sc_len = PFSYNC_MINPKT; 314 sc->sc_maxupdates = 128; 315 316 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 317 if (ifp == NULL) { 318 free(sc, M_PFSYNC); 319 return (ENOSPC); 320 } 321 if_initname(ifp, pfsyncname, unit); 322 ifp->if_softc = sc; 323 ifp->if_ioctl = pfsyncioctl; 324 ifp->if_output = pfsyncoutput; 325 ifp->if_type = IFT_PFSYNC; 326 ifp->if_snd.ifq_maxlen = ifqmaxlen; 327 ifp->if_hdrlen = sizeof(struct pfsync_header); 328 ifp->if_mtu = ETHERMTU; 329 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 330 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 331 callout_init(&sc->sc_tmo, 1); 332 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 333 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 334 335 if_attach(ifp); 336 337 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 338 339 V_pfsyncif = sc; 340 341 return (0); 342 } 343 344 static void 345 pfsync_clone_destroy(struct ifnet *ifp) 346 { 347 struct pfsync_softc *sc = ifp->if_softc; 348 349 /* 350 * At this stage, everything should have already been 351 * cleared by pfsync_uninit(), and we have only to 352 * drain callouts. 353 */ 354 while (sc->sc_deferred > 0) { 355 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 356 357 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 358 sc->sc_deferred--; 359 if (callout_stop(&pd->pd_tmo) > 0) { 360 pf_release_state(pd->pd_st); 361 m_freem(pd->pd_m); 362 free(pd, M_PFSYNC); 363 } else { 364 pd->pd_refs++; 365 callout_drain(&pd->pd_tmo); 366 free(pd, M_PFSYNC); 367 } 368 } 369 370 callout_drain(&sc->sc_tmo); 371 callout_drain(&sc->sc_bulkfail_tmo); 372 callout_drain(&sc->sc_bulk_tmo); 373 374 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 375 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 376 bpfdetach(ifp); 377 if_detach(ifp); 378 379 pfsync_drop(sc); 380 381 if_free(ifp); 382 if (sc->sc_imo.imo_membership) 383 pfsync_multicast_cleanup(sc); 384 mtx_destroy(&sc->sc_mtx); 385 mtx_destroy(&sc->sc_bulk_mtx); 386 free(sc, M_PFSYNC); 387 388 V_pfsyncif = NULL; 389 } 390 391 static int 392 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 393 struct pf_state_peer *d) 394 { 395 if (s->scrub.scrub_flag && d->scrub == NULL) { 396 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 397 if (d->scrub == NULL) 398 return (ENOMEM); 399 } 400 401 return (0); 402 } 403 404 405 static int 406 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 407 { 408 struct pfsync_softc *sc = V_pfsyncif; 409 #ifndef __NO_STRICT_ALIGNMENT 410 struct pfsync_state_key key[2]; 411 #endif 412 struct pfsync_state_key *kw, *ks; 413 struct pf_state *st = NULL; 414 struct pf_state_key *skw = NULL, *sks = NULL; 415 struct pf_rule *r = NULL; 416 struct pfi_kif *kif; 417 int error; 418 419 PF_RULES_RASSERT(); 420 421 if (sp->creatorid == 0) { 422 if (V_pf_status.debug >= PF_DEBUG_MISC) 423 printf("%s: invalid creator id: %08x\n", __func__, 424 ntohl(sp->creatorid)); 425 return (EINVAL); 426 } 427 428 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 429 if (V_pf_status.debug >= PF_DEBUG_MISC) 430 printf("%s: unknown interface: %s\n", __func__, 431 sp->ifname); 432 if (flags & PFSYNC_SI_IOCTL) 433 return (EINVAL); 434 return (0); /* skip this state */ 435 } 436 437 /* 438 * If the ruleset checksums match or the state is coming from the ioctl, 439 * it's safe to associate the state with the rule of that number. 440 */ 441 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 442 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 443 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 444 r = pf_main_ruleset.rules[ 445 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 446 else 447 r = &V_pf_default_rule; 448 449 if ((r->max_states && 450 counter_u64_fetch(r->states_cur) >= r->max_states)) 451 goto cleanup; 452 453 /* 454 * XXXGL: consider M_WAITOK in ioctl path after. 455 */ 456 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 457 goto cleanup; 458 459 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 460 goto cleanup; 461 462 #ifndef __NO_STRICT_ALIGNMENT 463 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 464 kw = &key[PF_SK_WIRE]; 465 ks = &key[PF_SK_STACK]; 466 #else 467 kw = &sp->key[PF_SK_WIRE]; 468 ks = &sp->key[PF_SK_STACK]; 469 #endif 470 471 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 472 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 473 kw->port[0] != ks->port[0] || 474 kw->port[1] != ks->port[1]) { 475 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 476 if (sks == NULL) 477 goto cleanup; 478 } else 479 sks = skw; 480 481 /* allocate memory for scrub info */ 482 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 483 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 484 goto cleanup; 485 486 /* Copy to state key(s). */ 487 skw->addr[0] = kw->addr[0]; 488 skw->addr[1] = kw->addr[1]; 489 skw->port[0] = kw->port[0]; 490 skw->port[1] = kw->port[1]; 491 skw->proto = sp->proto; 492 skw->af = sp->af; 493 if (sks != skw) { 494 sks->addr[0] = ks->addr[0]; 495 sks->addr[1] = ks->addr[1]; 496 sks->port[0] = ks->port[0]; 497 sks->port[1] = ks->port[1]; 498 sks->proto = sp->proto; 499 sks->af = sp->af; 500 } 501 502 /* copy to state */ 503 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 504 st->creation = time_uptime - ntohl(sp->creation); 505 st->expire = time_uptime; 506 if (sp->expire) { 507 uint32_t timeout; 508 509 timeout = r->timeout[sp->timeout]; 510 if (!timeout) 511 timeout = V_pf_default_rule.timeout[sp->timeout]; 512 513 /* sp->expire may have been adaptively scaled by export. */ 514 st->expire -= timeout - ntohl(sp->expire); 515 } 516 517 st->direction = sp->direction; 518 st->log = sp->log; 519 st->timeout = sp->timeout; 520 st->state_flags = sp->state_flags; 521 522 st->id = sp->id; 523 st->creatorid = sp->creatorid; 524 pf_state_peer_ntoh(&sp->src, &st->src); 525 pf_state_peer_ntoh(&sp->dst, &st->dst); 526 527 st->rule.ptr = r; 528 st->nat_rule.ptr = NULL; 529 st->anchor.ptr = NULL; 530 st->rt_kif = NULL; 531 532 st->pfsync_time = time_uptime; 533 st->sync_state = PFSYNC_S_NONE; 534 535 if (!(flags & PFSYNC_SI_IOCTL)) 536 st->state_flags |= PFSTATE_NOSYNC; 537 538 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) 539 goto cleanup_state; 540 541 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 542 counter_u64_add(r->states_cur, 1); 543 counter_u64_add(r->states_tot, 1); 544 545 if (!(flags & PFSYNC_SI_IOCTL)) { 546 st->state_flags &= ~PFSTATE_NOSYNC; 547 if (st->state_flags & PFSTATE_ACK) { 548 pfsync_q_ins(st, PFSYNC_S_IACK, true); 549 pfsync_push(sc); 550 } 551 } 552 st->state_flags &= ~PFSTATE_ACK; 553 PF_STATE_UNLOCK(st); 554 555 return (0); 556 557 cleanup: 558 error = ENOMEM; 559 if (skw == sks) 560 sks = NULL; 561 if (skw != NULL) 562 uma_zfree(V_pf_state_key_z, skw); 563 if (sks != NULL) 564 uma_zfree(V_pf_state_key_z, sks); 565 566 cleanup_state: /* pf_state_insert() frees the state keys. */ 567 if (st) { 568 if (st->dst.scrub) 569 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 570 if (st->src.scrub) 571 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 572 uma_zfree(V_pf_state_z, st); 573 } 574 return (error); 575 } 576 577 static int 578 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 579 { 580 struct pfsync_softc *sc = V_pfsyncif; 581 struct pfsync_pkt pkt; 582 struct mbuf *m = *mp; 583 struct ip *ip = mtod(m, struct ip *); 584 struct pfsync_header *ph; 585 struct pfsync_subheader subh; 586 587 int offset, len; 588 int rv; 589 uint16_t count; 590 591 PF_RULES_RLOCK_TRACKER; 592 593 *mp = NULL; 594 V_pfsyncstats.pfsyncs_ipackets++; 595 596 /* Verify that we have a sync interface configured. */ 597 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 598 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 599 goto done; 600 601 /* verify that the packet came in on the right interface */ 602 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 603 V_pfsyncstats.pfsyncs_badif++; 604 goto done; 605 } 606 607 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 608 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 609 /* verify that the IP TTL is 255. */ 610 if (ip->ip_ttl != PFSYNC_DFLTTL) { 611 V_pfsyncstats.pfsyncs_badttl++; 612 goto done; 613 } 614 615 offset = ip->ip_hl << 2; 616 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 617 V_pfsyncstats.pfsyncs_hdrops++; 618 goto done; 619 } 620 621 if (offset + sizeof(*ph) > m->m_len) { 622 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 623 V_pfsyncstats.pfsyncs_hdrops++; 624 return (IPPROTO_DONE); 625 } 626 ip = mtod(m, struct ip *); 627 } 628 ph = (struct pfsync_header *)((char *)ip + offset); 629 630 /* verify the version */ 631 if (ph->version != PFSYNC_VERSION) { 632 V_pfsyncstats.pfsyncs_badver++; 633 goto done; 634 } 635 636 len = ntohs(ph->len) + offset; 637 if (m->m_pkthdr.len < len) { 638 V_pfsyncstats.pfsyncs_badlen++; 639 goto done; 640 } 641 642 /* Cheaper to grab this now than having to mess with mbufs later */ 643 pkt.ip = ip; 644 pkt.src = ip->ip_src; 645 pkt.flags = 0; 646 647 /* 648 * Trusting pf_chksum during packet processing, as well as seeking 649 * in interface name tree, require holding PF_RULES_RLOCK(). 650 */ 651 PF_RULES_RLOCK(); 652 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 653 pkt.flags |= PFSYNC_SI_CKSUM; 654 655 offset += sizeof(*ph); 656 while (offset <= len - sizeof(subh)) { 657 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 658 offset += sizeof(subh); 659 660 if (subh.action >= PFSYNC_ACT_MAX) { 661 V_pfsyncstats.pfsyncs_badact++; 662 PF_RULES_RUNLOCK(); 663 goto done; 664 } 665 666 count = ntohs(subh.count); 667 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 668 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 669 if (rv == -1) { 670 PF_RULES_RUNLOCK(); 671 return (IPPROTO_DONE); 672 } 673 674 offset += rv; 675 } 676 PF_RULES_RUNLOCK(); 677 678 done: 679 m_freem(m); 680 return (IPPROTO_DONE); 681 } 682 683 static int 684 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 685 { 686 struct pfsync_clr *clr; 687 struct mbuf *mp; 688 int len = sizeof(*clr) * count; 689 int i, offp; 690 u_int32_t creatorid; 691 692 mp = m_pulldown(m, offset, len, &offp); 693 if (mp == NULL) { 694 V_pfsyncstats.pfsyncs_badlen++; 695 return (-1); 696 } 697 clr = (struct pfsync_clr *)(mp->m_data + offp); 698 699 for (i = 0; i < count; i++) { 700 creatorid = clr[i].creatorid; 701 702 if (clr[i].ifname[0] != '\0' && 703 pfi_kif_find(clr[i].ifname) == NULL) 704 continue; 705 706 for (int i = 0; i <= pf_hashmask; i++) { 707 struct pf_idhash *ih = &V_pf_idhash[i]; 708 struct pf_state *s; 709 relock: 710 PF_HASHROW_LOCK(ih); 711 LIST_FOREACH(s, &ih->states, entry) { 712 if (s->creatorid == creatorid) { 713 s->state_flags |= PFSTATE_NOSYNC; 714 pf_unlink_state(s, PF_ENTER_LOCKED); 715 goto relock; 716 } 717 } 718 PF_HASHROW_UNLOCK(ih); 719 } 720 } 721 722 return (len); 723 } 724 725 static int 726 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 727 { 728 struct mbuf *mp; 729 struct pfsync_state *sa, *sp; 730 int len = sizeof(*sp) * count; 731 int i, offp; 732 733 mp = m_pulldown(m, offset, len, &offp); 734 if (mp == NULL) { 735 V_pfsyncstats.pfsyncs_badlen++; 736 return (-1); 737 } 738 sa = (struct pfsync_state *)(mp->m_data + offp); 739 740 for (i = 0; i < count; i++) { 741 sp = &sa[i]; 742 743 /* Check for invalid values. */ 744 if (sp->timeout >= PFTM_MAX || 745 sp->src.state > PF_TCPS_PROXY_DST || 746 sp->dst.state > PF_TCPS_PROXY_DST || 747 sp->direction > PF_OUT || 748 (sp->af != AF_INET && sp->af != AF_INET6)) { 749 if (V_pf_status.debug >= PF_DEBUG_MISC) 750 printf("%s: invalid value\n", __func__); 751 V_pfsyncstats.pfsyncs_badval++; 752 continue; 753 } 754 755 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 756 /* Drop out, but process the rest of the actions. */ 757 break; 758 } 759 760 return (len); 761 } 762 763 static int 764 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 765 { 766 struct pfsync_ins_ack *ia, *iaa; 767 struct pf_state *st; 768 769 struct mbuf *mp; 770 int len = count * sizeof(*ia); 771 int offp, i; 772 773 mp = m_pulldown(m, offset, len, &offp); 774 if (mp == NULL) { 775 V_pfsyncstats.pfsyncs_badlen++; 776 return (-1); 777 } 778 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 779 780 for (i = 0; i < count; i++) { 781 ia = &iaa[i]; 782 783 st = pf_find_state_byid(ia->id, ia->creatorid); 784 if (st == NULL) 785 continue; 786 787 if (st->state_flags & PFSTATE_ACK) { 788 PFSYNC_LOCK(V_pfsyncif); 789 pfsync_undefer_state(st, 0); 790 PFSYNC_UNLOCK(V_pfsyncif); 791 } 792 PF_STATE_UNLOCK(st); 793 } 794 /* 795 * XXX this is not yet implemented, but we know the size of the 796 * message so we can skip it. 797 */ 798 799 return (count * sizeof(struct pfsync_ins_ack)); 800 } 801 802 static int 803 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 804 struct pfsync_state_peer *dst) 805 { 806 int sync = 0; 807 808 PF_STATE_LOCK_ASSERT(st); 809 810 /* 811 * The state should never go backwards except 812 * for syn-proxy states. Neither should the 813 * sequence window slide backwards. 814 */ 815 if ((st->src.state > src->state && 816 (st->src.state < PF_TCPS_PROXY_SRC || 817 src->state >= PF_TCPS_PROXY_SRC)) || 818 819 (st->src.state == src->state && 820 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 821 sync++; 822 else 823 pf_state_peer_ntoh(src, &st->src); 824 825 if ((st->dst.state > dst->state) || 826 827 (st->dst.state >= TCPS_SYN_SENT && 828 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 829 sync++; 830 else 831 pf_state_peer_ntoh(dst, &st->dst); 832 833 return (sync); 834 } 835 836 static int 837 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 838 { 839 struct pfsync_softc *sc = V_pfsyncif; 840 struct pfsync_state *sa, *sp; 841 struct pf_state *st; 842 int sync; 843 844 struct mbuf *mp; 845 int len = count * sizeof(*sp); 846 int offp, i; 847 848 mp = m_pulldown(m, offset, len, &offp); 849 if (mp == NULL) { 850 V_pfsyncstats.pfsyncs_badlen++; 851 return (-1); 852 } 853 sa = (struct pfsync_state *)(mp->m_data + offp); 854 855 for (i = 0; i < count; i++) { 856 sp = &sa[i]; 857 858 /* check for invalid values */ 859 if (sp->timeout >= PFTM_MAX || 860 sp->src.state > PF_TCPS_PROXY_DST || 861 sp->dst.state > PF_TCPS_PROXY_DST) { 862 if (V_pf_status.debug >= PF_DEBUG_MISC) { 863 printf("pfsync_input: PFSYNC_ACT_UPD: " 864 "invalid value\n"); 865 } 866 V_pfsyncstats.pfsyncs_badval++; 867 continue; 868 } 869 870 st = pf_find_state_byid(sp->id, sp->creatorid); 871 if (st == NULL) { 872 /* insert the update */ 873 if (pfsync_state_import(sp, pkt->flags)) 874 V_pfsyncstats.pfsyncs_badstate++; 875 continue; 876 } 877 878 if (st->state_flags & PFSTATE_ACK) { 879 PFSYNC_LOCK(sc); 880 pfsync_undefer_state(st, 1); 881 PFSYNC_UNLOCK(sc); 882 } 883 884 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 885 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 886 else { 887 sync = 0; 888 889 /* 890 * Non-TCP protocol state machine always go 891 * forwards 892 */ 893 if (st->src.state > sp->src.state) 894 sync++; 895 else 896 pf_state_peer_ntoh(&sp->src, &st->src); 897 if (st->dst.state > sp->dst.state) 898 sync++; 899 else 900 pf_state_peer_ntoh(&sp->dst, &st->dst); 901 } 902 if (sync < 2) { 903 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 904 pf_state_peer_ntoh(&sp->dst, &st->dst); 905 st->expire = time_uptime; 906 st->timeout = sp->timeout; 907 } 908 st->pfsync_time = time_uptime; 909 910 if (sync) { 911 V_pfsyncstats.pfsyncs_stale++; 912 913 pfsync_update_state(st); 914 PF_STATE_UNLOCK(st); 915 PFSYNC_LOCK(sc); 916 pfsync_push(sc); 917 PFSYNC_UNLOCK(sc); 918 continue; 919 } 920 PF_STATE_UNLOCK(st); 921 } 922 923 return (len); 924 } 925 926 static int 927 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 928 { 929 struct pfsync_softc *sc = V_pfsyncif; 930 struct pfsync_upd_c *ua, *up; 931 struct pf_state *st; 932 int len = count * sizeof(*up); 933 int sync; 934 struct mbuf *mp; 935 int offp, i; 936 937 mp = m_pulldown(m, offset, len, &offp); 938 if (mp == NULL) { 939 V_pfsyncstats.pfsyncs_badlen++; 940 return (-1); 941 } 942 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 943 944 for (i = 0; i < count; i++) { 945 up = &ua[i]; 946 947 /* check for invalid values */ 948 if (up->timeout >= PFTM_MAX || 949 up->src.state > PF_TCPS_PROXY_DST || 950 up->dst.state > PF_TCPS_PROXY_DST) { 951 if (V_pf_status.debug >= PF_DEBUG_MISC) { 952 printf("pfsync_input: " 953 "PFSYNC_ACT_UPD_C: " 954 "invalid value\n"); 955 } 956 V_pfsyncstats.pfsyncs_badval++; 957 continue; 958 } 959 960 st = pf_find_state_byid(up->id, up->creatorid); 961 if (st == NULL) { 962 /* We don't have this state. Ask for it. */ 963 PFSYNC_LOCK(sc); 964 pfsync_request_update(up->creatorid, up->id); 965 PFSYNC_UNLOCK(sc); 966 continue; 967 } 968 969 if (st->state_flags & PFSTATE_ACK) { 970 PFSYNC_LOCK(sc); 971 pfsync_undefer_state(st, 1); 972 PFSYNC_UNLOCK(sc); 973 } 974 975 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 976 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 977 else { 978 sync = 0; 979 980 /* 981 * Non-TCP protocol state machine always go 982 * forwards 983 */ 984 if (st->src.state > up->src.state) 985 sync++; 986 else 987 pf_state_peer_ntoh(&up->src, &st->src); 988 if (st->dst.state > up->dst.state) 989 sync++; 990 else 991 pf_state_peer_ntoh(&up->dst, &st->dst); 992 } 993 if (sync < 2) { 994 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 995 pf_state_peer_ntoh(&up->dst, &st->dst); 996 st->expire = time_uptime; 997 st->timeout = up->timeout; 998 } 999 st->pfsync_time = time_uptime; 1000 1001 if (sync) { 1002 V_pfsyncstats.pfsyncs_stale++; 1003 1004 pfsync_update_state(st); 1005 PF_STATE_UNLOCK(st); 1006 PFSYNC_LOCK(sc); 1007 pfsync_push(sc); 1008 PFSYNC_UNLOCK(sc); 1009 continue; 1010 } 1011 PF_STATE_UNLOCK(st); 1012 } 1013 1014 return (len); 1015 } 1016 1017 static int 1018 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1019 { 1020 struct pfsync_upd_req *ur, *ura; 1021 struct mbuf *mp; 1022 int len = count * sizeof(*ur); 1023 int i, offp; 1024 1025 struct pf_state *st; 1026 1027 mp = m_pulldown(m, offset, len, &offp); 1028 if (mp == NULL) { 1029 V_pfsyncstats.pfsyncs_badlen++; 1030 return (-1); 1031 } 1032 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1033 1034 for (i = 0; i < count; i++) { 1035 ur = &ura[i]; 1036 1037 if (ur->id == 0 && ur->creatorid == 0) 1038 pfsync_bulk_start(); 1039 else { 1040 st = pf_find_state_byid(ur->id, ur->creatorid); 1041 if (st == NULL) { 1042 V_pfsyncstats.pfsyncs_badstate++; 1043 continue; 1044 } 1045 if (st->state_flags & PFSTATE_NOSYNC) { 1046 PF_STATE_UNLOCK(st); 1047 continue; 1048 } 1049 1050 pfsync_update_state_req(st); 1051 PF_STATE_UNLOCK(st); 1052 } 1053 } 1054 1055 return (len); 1056 } 1057 1058 static int 1059 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1060 { 1061 struct mbuf *mp; 1062 struct pfsync_state *sa, *sp; 1063 struct pf_state *st; 1064 int len = count * sizeof(*sp); 1065 int offp, i; 1066 1067 mp = m_pulldown(m, offset, len, &offp); 1068 if (mp == NULL) { 1069 V_pfsyncstats.pfsyncs_badlen++; 1070 return (-1); 1071 } 1072 sa = (struct pfsync_state *)(mp->m_data + offp); 1073 1074 for (i = 0; i < count; i++) { 1075 sp = &sa[i]; 1076 1077 st = pf_find_state_byid(sp->id, sp->creatorid); 1078 if (st == NULL) { 1079 V_pfsyncstats.pfsyncs_badstate++; 1080 continue; 1081 } 1082 st->state_flags |= PFSTATE_NOSYNC; 1083 pf_unlink_state(st, PF_ENTER_LOCKED); 1084 } 1085 1086 return (len); 1087 } 1088 1089 static int 1090 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1091 { 1092 struct mbuf *mp; 1093 struct pfsync_del_c *sa, *sp; 1094 struct pf_state *st; 1095 int len = count * sizeof(*sp); 1096 int offp, i; 1097 1098 mp = m_pulldown(m, offset, len, &offp); 1099 if (mp == NULL) { 1100 V_pfsyncstats.pfsyncs_badlen++; 1101 return (-1); 1102 } 1103 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1104 1105 for (i = 0; i < count; i++) { 1106 sp = &sa[i]; 1107 1108 st = pf_find_state_byid(sp->id, sp->creatorid); 1109 if (st == NULL) { 1110 V_pfsyncstats.pfsyncs_badstate++; 1111 continue; 1112 } 1113 1114 st->state_flags |= PFSTATE_NOSYNC; 1115 pf_unlink_state(st, PF_ENTER_LOCKED); 1116 } 1117 1118 return (len); 1119 } 1120 1121 static int 1122 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1123 { 1124 struct pfsync_softc *sc = V_pfsyncif; 1125 struct pfsync_bus *bus; 1126 struct mbuf *mp; 1127 int len = count * sizeof(*bus); 1128 int offp; 1129 1130 PFSYNC_BLOCK(sc); 1131 1132 /* If we're not waiting for a bulk update, who cares. */ 1133 if (sc->sc_ureq_sent == 0) { 1134 PFSYNC_BUNLOCK(sc); 1135 return (len); 1136 } 1137 1138 mp = m_pulldown(m, offset, len, &offp); 1139 if (mp == NULL) { 1140 PFSYNC_BUNLOCK(sc); 1141 V_pfsyncstats.pfsyncs_badlen++; 1142 return (-1); 1143 } 1144 bus = (struct pfsync_bus *)(mp->m_data + offp); 1145 1146 switch (bus->status) { 1147 case PFSYNC_BUS_START: 1148 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1149 V_pf_limits[PF_LIMIT_STATES].limit / 1150 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1151 sizeof(struct pfsync_state)), 1152 pfsync_bulk_fail, sc); 1153 if (V_pf_status.debug >= PF_DEBUG_MISC) 1154 printf("pfsync: received bulk update start\n"); 1155 break; 1156 1157 case PFSYNC_BUS_END: 1158 if (time_uptime - ntohl(bus->endtime) >= 1159 sc->sc_ureq_sent) { 1160 /* that's it, we're happy */ 1161 sc->sc_ureq_sent = 0; 1162 sc->sc_bulk_tries = 0; 1163 callout_stop(&sc->sc_bulkfail_tmo); 1164 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1165 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1166 "pfsync bulk done"); 1167 sc->sc_flags |= PFSYNCF_OK; 1168 if (V_pf_status.debug >= PF_DEBUG_MISC) 1169 printf("pfsync: received valid " 1170 "bulk update end\n"); 1171 } else { 1172 if (V_pf_status.debug >= PF_DEBUG_MISC) 1173 printf("pfsync: received invalid " 1174 "bulk update end: bad timestamp\n"); 1175 } 1176 break; 1177 } 1178 PFSYNC_BUNLOCK(sc); 1179 1180 return (len); 1181 } 1182 1183 static int 1184 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1185 { 1186 int len = count * sizeof(struct pfsync_tdb); 1187 1188 #if defined(IPSEC) 1189 struct pfsync_tdb *tp; 1190 struct mbuf *mp; 1191 int offp; 1192 int i; 1193 int s; 1194 1195 mp = m_pulldown(m, offset, len, &offp); 1196 if (mp == NULL) { 1197 V_pfsyncstats.pfsyncs_badlen++; 1198 return (-1); 1199 } 1200 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1201 1202 for (i = 0; i < count; i++) 1203 pfsync_update_net_tdb(&tp[i]); 1204 #endif 1205 1206 return (len); 1207 } 1208 1209 #if defined(IPSEC) 1210 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1211 static void 1212 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1213 { 1214 struct tdb *tdb; 1215 int s; 1216 1217 /* check for invalid values */ 1218 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1219 (pt->dst.sa.sa_family != AF_INET && 1220 pt->dst.sa.sa_family != AF_INET6)) 1221 goto bad; 1222 1223 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1224 if (tdb) { 1225 pt->rpl = ntohl(pt->rpl); 1226 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1227 1228 /* Neither replay nor byte counter should ever decrease. */ 1229 if (pt->rpl < tdb->tdb_rpl || 1230 pt->cur_bytes < tdb->tdb_cur_bytes) { 1231 goto bad; 1232 } 1233 1234 tdb->tdb_rpl = pt->rpl; 1235 tdb->tdb_cur_bytes = pt->cur_bytes; 1236 } 1237 return; 1238 1239 bad: 1240 if (V_pf_status.debug >= PF_DEBUG_MISC) 1241 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1242 "invalid value\n"); 1243 V_pfsyncstats.pfsyncs_badstate++; 1244 return; 1245 } 1246 #endif 1247 1248 1249 static int 1250 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1251 { 1252 /* check if we are at the right place in the packet */ 1253 if (offset != m->m_pkthdr.len) 1254 V_pfsyncstats.pfsyncs_badlen++; 1255 1256 /* we're done. free and let the caller return */ 1257 m_freem(m); 1258 return (-1); 1259 } 1260 1261 static int 1262 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1263 { 1264 V_pfsyncstats.pfsyncs_badact++; 1265 1266 m_freem(m); 1267 return (-1); 1268 } 1269 1270 static int 1271 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1272 struct route *rt) 1273 { 1274 m_freem(m); 1275 return (0); 1276 } 1277 1278 /* ARGSUSED */ 1279 static int 1280 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1281 { 1282 struct pfsync_softc *sc = ifp->if_softc; 1283 struct ifreq *ifr = (struct ifreq *)data; 1284 struct pfsyncreq pfsyncr; 1285 int error; 1286 1287 switch (cmd) { 1288 case SIOCSIFFLAGS: 1289 PFSYNC_LOCK(sc); 1290 if (ifp->if_flags & IFF_UP) { 1291 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1292 PFSYNC_UNLOCK(sc); 1293 pfsync_pointers_init(); 1294 } else { 1295 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1296 PFSYNC_UNLOCK(sc); 1297 pfsync_pointers_uninit(); 1298 } 1299 break; 1300 case SIOCSIFMTU: 1301 if (!sc->sc_sync_if || 1302 ifr->ifr_mtu <= PFSYNC_MINPKT || 1303 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1304 return (EINVAL); 1305 if (ifr->ifr_mtu < ifp->if_mtu) { 1306 PFSYNC_LOCK(sc); 1307 if (sc->sc_len > PFSYNC_MINPKT) 1308 pfsync_sendout(1); 1309 PFSYNC_UNLOCK(sc); 1310 } 1311 ifp->if_mtu = ifr->ifr_mtu; 1312 break; 1313 case SIOCGETPFSYNC: 1314 bzero(&pfsyncr, sizeof(pfsyncr)); 1315 PFSYNC_LOCK(sc); 1316 if (sc->sc_sync_if) { 1317 strlcpy(pfsyncr.pfsyncr_syncdev, 1318 sc->sc_sync_if->if_xname, IFNAMSIZ); 1319 } 1320 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1321 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1322 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1323 (sc->sc_flags & PFSYNCF_DEFER)); 1324 PFSYNC_UNLOCK(sc); 1325 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1326 sizeof(pfsyncr))); 1327 1328 case SIOCSETPFSYNC: 1329 { 1330 struct ip_moptions *imo = &sc->sc_imo; 1331 struct ifnet *sifp; 1332 struct ip *ip; 1333 void *mship = NULL; 1334 1335 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1336 return (error); 1337 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1338 sizeof(pfsyncr)))) 1339 return (error); 1340 1341 if (pfsyncr.pfsyncr_maxupdates > 255) 1342 return (EINVAL); 1343 1344 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1345 sifp = NULL; 1346 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1347 return (EINVAL); 1348 1349 if (sifp != NULL && ( 1350 pfsyncr.pfsyncr_syncpeer.s_addr == 0 || 1351 pfsyncr.pfsyncr_syncpeer.s_addr == 1352 htonl(INADDR_PFSYNC_GROUP))) 1353 mship = malloc((sizeof(struct in_multi *) * 1354 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1355 1356 PFSYNC_LOCK(sc); 1357 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1358 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1359 else 1360 sc->sc_sync_peer.s_addr = 1361 pfsyncr.pfsyncr_syncpeer.s_addr; 1362 1363 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1364 if (pfsyncr.pfsyncr_defer) { 1365 sc->sc_flags |= PFSYNCF_DEFER; 1366 V_pfsync_defer_ptr = pfsync_defer; 1367 } else { 1368 sc->sc_flags &= ~PFSYNCF_DEFER; 1369 V_pfsync_defer_ptr = NULL; 1370 } 1371 1372 if (sifp == NULL) { 1373 if (sc->sc_sync_if) 1374 if_rele(sc->sc_sync_if); 1375 sc->sc_sync_if = NULL; 1376 if (imo->imo_membership) 1377 pfsync_multicast_cleanup(sc); 1378 PFSYNC_UNLOCK(sc); 1379 break; 1380 } 1381 1382 if (sc->sc_len > PFSYNC_MINPKT && 1383 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1384 (sc->sc_sync_if != NULL && 1385 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1386 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1387 pfsync_sendout(1); 1388 1389 if (imo->imo_membership) 1390 pfsync_multicast_cleanup(sc); 1391 1392 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1393 error = pfsync_multicast_setup(sc, sifp, mship); 1394 if (error) { 1395 if_rele(sifp); 1396 free(mship, M_PFSYNC); 1397 PFSYNC_UNLOCK(sc); 1398 return (error); 1399 } 1400 } 1401 if (sc->sc_sync_if) 1402 if_rele(sc->sc_sync_if); 1403 sc->sc_sync_if = sifp; 1404 1405 ip = &sc->sc_template; 1406 bzero(ip, sizeof(*ip)); 1407 ip->ip_v = IPVERSION; 1408 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1409 ip->ip_tos = IPTOS_LOWDELAY; 1410 /* len and id are set later. */ 1411 ip->ip_off = htons(IP_DF); 1412 ip->ip_ttl = PFSYNC_DFLTTL; 1413 ip->ip_p = IPPROTO_PFSYNC; 1414 ip->ip_src.s_addr = INADDR_ANY; 1415 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1416 1417 /* Request a full state table update. */ 1418 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1419 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1420 "pfsync bulk start"); 1421 sc->sc_flags &= ~PFSYNCF_OK; 1422 if (V_pf_status.debug >= PF_DEBUG_MISC) 1423 printf("pfsync: requesting bulk update\n"); 1424 pfsync_request_update(0, 0); 1425 PFSYNC_UNLOCK(sc); 1426 PFSYNC_BLOCK(sc); 1427 sc->sc_ureq_sent = time_uptime; 1428 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1429 sc); 1430 PFSYNC_BUNLOCK(sc); 1431 1432 break; 1433 } 1434 default: 1435 return (ENOTTY); 1436 } 1437 1438 return (0); 1439 } 1440 1441 static void 1442 pfsync_out_state(struct pf_state *st, void *buf) 1443 { 1444 struct pfsync_state *sp = buf; 1445 1446 pfsync_state_export(sp, st); 1447 } 1448 1449 static void 1450 pfsync_out_iack(struct pf_state *st, void *buf) 1451 { 1452 struct pfsync_ins_ack *iack = buf; 1453 1454 iack->id = st->id; 1455 iack->creatorid = st->creatorid; 1456 } 1457 1458 static void 1459 pfsync_out_upd_c(struct pf_state *st, void *buf) 1460 { 1461 struct pfsync_upd_c *up = buf; 1462 1463 bzero(up, sizeof(*up)); 1464 up->id = st->id; 1465 pf_state_peer_hton(&st->src, &up->src); 1466 pf_state_peer_hton(&st->dst, &up->dst); 1467 up->creatorid = st->creatorid; 1468 up->timeout = st->timeout; 1469 } 1470 1471 static void 1472 pfsync_out_del(struct pf_state *st, void *buf) 1473 { 1474 struct pfsync_del_c *dp = buf; 1475 1476 dp->id = st->id; 1477 dp->creatorid = st->creatorid; 1478 st->state_flags |= PFSTATE_NOSYNC; 1479 } 1480 1481 static void 1482 pfsync_drop(struct pfsync_softc *sc) 1483 { 1484 struct pf_state *st, *next; 1485 struct pfsync_upd_req_item *ur; 1486 int q; 1487 1488 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1489 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1490 continue; 1491 1492 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1493 KASSERT(st->sync_state == q, 1494 ("%s: st->sync_state == q", 1495 __func__)); 1496 st->sync_state = PFSYNC_S_NONE; 1497 pf_release_state(st); 1498 } 1499 TAILQ_INIT(&sc->sc_qs[q]); 1500 } 1501 1502 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1503 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1504 free(ur, M_PFSYNC); 1505 } 1506 1507 sc->sc_plus = NULL; 1508 sc->sc_len = PFSYNC_MINPKT; 1509 } 1510 1511 static void 1512 pfsync_sendout(int schedswi) 1513 { 1514 struct pfsync_softc *sc = V_pfsyncif; 1515 struct ifnet *ifp = sc->sc_ifp; 1516 struct mbuf *m; 1517 struct ip *ip; 1518 struct pfsync_header *ph; 1519 struct pfsync_subheader *subh; 1520 struct pf_state *st, *st_next; 1521 struct pfsync_upd_req_item *ur; 1522 int offset; 1523 int q, count = 0; 1524 1525 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1526 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1527 ("%s: sc_len %zu", __func__, sc->sc_len)); 1528 PFSYNC_LOCK_ASSERT(sc); 1529 1530 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1531 pfsync_drop(sc); 1532 return; 1533 } 1534 1535 m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1536 if (m == NULL) { 1537 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1538 V_pfsyncstats.pfsyncs_onomem++; 1539 return; 1540 } 1541 m->m_data += max_linkhdr; 1542 m->m_len = m->m_pkthdr.len = sc->sc_len; 1543 1544 /* build the ip header */ 1545 ip = (struct ip *)m->m_data; 1546 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1547 offset = sizeof(*ip); 1548 1549 ip->ip_len = htons(m->m_pkthdr.len); 1550 ip_fillid(ip); 1551 1552 /* build the pfsync header */ 1553 ph = (struct pfsync_header *)(m->m_data + offset); 1554 bzero(ph, sizeof(*ph)); 1555 offset += sizeof(*ph); 1556 1557 ph->version = PFSYNC_VERSION; 1558 ph->len = htons(sc->sc_len - sizeof(*ip)); 1559 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1560 1561 /* walk the queues */ 1562 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1563 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1564 continue; 1565 1566 subh = (struct pfsync_subheader *)(m->m_data + offset); 1567 offset += sizeof(*subh); 1568 1569 count = 0; 1570 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { 1571 KASSERT(st->sync_state == q, 1572 ("%s: st->sync_state == q", 1573 __func__)); 1574 /* 1575 * XXXGL: some of write methods do unlocked reads 1576 * of state data :( 1577 */ 1578 pfsync_qs[q].write(st, m->m_data + offset); 1579 offset += pfsync_qs[q].len; 1580 st->sync_state = PFSYNC_S_NONE; 1581 pf_release_state(st); 1582 count++; 1583 } 1584 TAILQ_INIT(&sc->sc_qs[q]); 1585 1586 bzero(subh, sizeof(*subh)); 1587 subh->action = pfsync_qs[q].action; 1588 subh->count = htons(count); 1589 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1590 } 1591 1592 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1593 subh = (struct pfsync_subheader *)(m->m_data + offset); 1594 offset += sizeof(*subh); 1595 1596 count = 0; 1597 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1598 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1599 1600 bcopy(&ur->ur_msg, m->m_data + offset, 1601 sizeof(ur->ur_msg)); 1602 offset += sizeof(ur->ur_msg); 1603 free(ur, M_PFSYNC); 1604 count++; 1605 } 1606 1607 bzero(subh, sizeof(*subh)); 1608 subh->action = PFSYNC_ACT_UPD_REQ; 1609 subh->count = htons(count); 1610 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1611 } 1612 1613 /* has someone built a custom region for us to add? */ 1614 if (sc->sc_plus != NULL) { 1615 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1616 offset += sc->sc_pluslen; 1617 1618 sc->sc_plus = NULL; 1619 } 1620 1621 subh = (struct pfsync_subheader *)(m->m_data + offset); 1622 offset += sizeof(*subh); 1623 1624 bzero(subh, sizeof(*subh)); 1625 subh->action = PFSYNC_ACT_EOF; 1626 subh->count = htons(1); 1627 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1628 1629 /* we're done, let's put it on the wire */ 1630 if (ifp->if_bpf) { 1631 m->m_data += sizeof(*ip); 1632 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1633 BPF_MTAP(ifp, m); 1634 m->m_data -= sizeof(*ip); 1635 m->m_len = m->m_pkthdr.len = sc->sc_len; 1636 } 1637 1638 if (sc->sc_sync_if == NULL) { 1639 sc->sc_len = PFSYNC_MINPKT; 1640 m_freem(m); 1641 return; 1642 } 1643 1644 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1645 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1646 sc->sc_len = PFSYNC_MINPKT; 1647 1648 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1649 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1650 else { 1651 m_freem(m); 1652 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1653 } 1654 if (schedswi) 1655 swi_sched(V_pfsync_swi_cookie, 0); 1656 } 1657 1658 static void 1659 pfsync_insert_state(struct pf_state *st) 1660 { 1661 struct pfsync_softc *sc = V_pfsyncif; 1662 1663 if (st->state_flags & PFSTATE_NOSYNC) 1664 return; 1665 1666 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1667 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1668 st->state_flags |= PFSTATE_NOSYNC; 1669 return; 1670 } 1671 1672 KASSERT(st->sync_state == PFSYNC_S_NONE, 1673 ("%s: st->sync_state %u", __func__, st->sync_state)); 1674 1675 PFSYNC_LOCK(sc); 1676 if (sc->sc_len == PFSYNC_MINPKT) 1677 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1678 1679 pfsync_q_ins(st, PFSYNC_S_INS, true); 1680 PFSYNC_UNLOCK(sc); 1681 1682 st->sync_updates = 0; 1683 } 1684 1685 static int 1686 pfsync_defer(struct pf_state *st, struct mbuf *m) 1687 { 1688 struct pfsync_softc *sc = V_pfsyncif; 1689 struct pfsync_deferral *pd; 1690 1691 if (m->m_flags & (M_BCAST|M_MCAST)) 1692 return (0); 1693 1694 PFSYNC_LOCK(sc); 1695 1696 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1697 !(sc->sc_flags & PFSYNCF_DEFER)) { 1698 PFSYNC_UNLOCK(sc); 1699 return (0); 1700 } 1701 1702 if (sc->sc_deferred >= 128) 1703 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1704 1705 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1706 if (pd == NULL) 1707 return (0); 1708 sc->sc_deferred++; 1709 1710 m->m_flags |= M_SKIP_FIREWALL; 1711 st->state_flags |= PFSTATE_ACK; 1712 1713 pd->pd_sc = sc; 1714 pd->pd_refs = 0; 1715 pd->pd_st = st; 1716 pf_ref_state(st); 1717 pd->pd_m = m; 1718 1719 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1720 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1721 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1722 1723 pfsync_push(sc); 1724 1725 return (1); 1726 } 1727 1728 static void 1729 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1730 { 1731 struct pfsync_softc *sc = pd->pd_sc; 1732 struct mbuf *m = pd->pd_m; 1733 struct pf_state *st = pd->pd_st; 1734 1735 PFSYNC_LOCK_ASSERT(sc); 1736 1737 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1738 sc->sc_deferred--; 1739 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1740 free(pd, M_PFSYNC); 1741 pf_release_state(st); 1742 1743 if (drop) 1744 m_freem(m); 1745 else { 1746 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1747 pfsync_push(sc); 1748 } 1749 } 1750 1751 static void 1752 pfsync_defer_tmo(void *arg) 1753 { 1754 struct pfsync_deferral *pd = arg; 1755 struct pfsync_softc *sc = pd->pd_sc; 1756 struct mbuf *m = pd->pd_m; 1757 struct pf_state *st = pd->pd_st; 1758 1759 PFSYNC_LOCK_ASSERT(sc); 1760 1761 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1762 1763 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1764 sc->sc_deferred--; 1765 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1766 if (pd->pd_refs == 0) 1767 free(pd, M_PFSYNC); 1768 PFSYNC_UNLOCK(sc); 1769 1770 ip_output(m, NULL, NULL, 0, NULL, NULL); 1771 1772 pf_release_state(st); 1773 1774 CURVNET_RESTORE(); 1775 } 1776 1777 static void 1778 pfsync_undefer_state(struct pf_state *st, int drop) 1779 { 1780 struct pfsync_softc *sc = V_pfsyncif; 1781 struct pfsync_deferral *pd; 1782 1783 PFSYNC_LOCK_ASSERT(sc); 1784 1785 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1786 if (pd->pd_st == st) { 1787 if (callout_stop(&pd->pd_tmo) > 0) 1788 pfsync_undefer(pd, drop); 1789 return; 1790 } 1791 } 1792 1793 panic("%s: unable to find deferred state", __func__); 1794 } 1795 1796 static void 1797 pfsync_update_state(struct pf_state *st) 1798 { 1799 struct pfsync_softc *sc = V_pfsyncif; 1800 bool sync = false, ref = true; 1801 1802 PF_STATE_LOCK_ASSERT(st); 1803 PFSYNC_LOCK(sc); 1804 1805 if (st->state_flags & PFSTATE_ACK) 1806 pfsync_undefer_state(st, 0); 1807 if (st->state_flags & PFSTATE_NOSYNC) { 1808 if (st->sync_state != PFSYNC_S_NONE) 1809 pfsync_q_del(st, true); 1810 PFSYNC_UNLOCK(sc); 1811 return; 1812 } 1813 1814 if (sc->sc_len == PFSYNC_MINPKT) 1815 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1816 1817 switch (st->sync_state) { 1818 case PFSYNC_S_UPD_C: 1819 case PFSYNC_S_UPD: 1820 case PFSYNC_S_INS: 1821 /* we're already handling it */ 1822 1823 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1824 st->sync_updates++; 1825 if (st->sync_updates >= sc->sc_maxupdates) 1826 sync = true; 1827 } 1828 break; 1829 1830 case PFSYNC_S_IACK: 1831 pfsync_q_del(st, false); 1832 ref = false; 1833 /* FALLTHROUGH */ 1834 1835 case PFSYNC_S_NONE: 1836 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1837 st->sync_updates = 0; 1838 break; 1839 1840 default: 1841 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1842 } 1843 1844 if (sync || (time_uptime - st->pfsync_time) < 2) 1845 pfsync_push(sc); 1846 1847 PFSYNC_UNLOCK(sc); 1848 } 1849 1850 static void 1851 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1852 { 1853 struct pfsync_softc *sc = V_pfsyncif; 1854 struct pfsync_upd_req_item *item; 1855 size_t nlen = sizeof(struct pfsync_upd_req); 1856 1857 PFSYNC_LOCK_ASSERT(sc); 1858 1859 /* 1860 * This code does a bit to prevent multiple update requests for the 1861 * same state being generated. It searches current subheader queue, 1862 * but it doesn't lookup into queue of already packed datagrams. 1863 */ 1864 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1865 if (item->ur_msg.id == id && 1866 item->ur_msg.creatorid == creatorid) 1867 return; 1868 1869 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1870 if (item == NULL) 1871 return; /* XXX stats */ 1872 1873 item->ur_msg.id = id; 1874 item->ur_msg.creatorid = creatorid; 1875 1876 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1877 nlen += sizeof(struct pfsync_subheader); 1878 1879 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1880 pfsync_sendout(1); 1881 1882 nlen = sizeof(struct pfsync_subheader) + 1883 sizeof(struct pfsync_upd_req); 1884 } 1885 1886 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1887 sc->sc_len += nlen; 1888 } 1889 1890 static void 1891 pfsync_update_state_req(struct pf_state *st) 1892 { 1893 struct pfsync_softc *sc = V_pfsyncif; 1894 bool ref = true; 1895 1896 PF_STATE_LOCK_ASSERT(st); 1897 PFSYNC_LOCK(sc); 1898 1899 if (st->state_flags & PFSTATE_NOSYNC) { 1900 if (st->sync_state != PFSYNC_S_NONE) 1901 pfsync_q_del(st, true); 1902 PFSYNC_UNLOCK(sc); 1903 return; 1904 } 1905 1906 switch (st->sync_state) { 1907 case PFSYNC_S_UPD_C: 1908 case PFSYNC_S_IACK: 1909 pfsync_q_del(st, false); 1910 ref = false; 1911 /* FALLTHROUGH */ 1912 1913 case PFSYNC_S_NONE: 1914 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 1915 pfsync_push(sc); 1916 break; 1917 1918 case PFSYNC_S_INS: 1919 case PFSYNC_S_UPD: 1920 case PFSYNC_S_DEL: 1921 /* we're already handling it */ 1922 break; 1923 1924 default: 1925 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1926 } 1927 1928 PFSYNC_UNLOCK(sc); 1929 } 1930 1931 static void 1932 pfsync_delete_state(struct pf_state *st) 1933 { 1934 struct pfsync_softc *sc = V_pfsyncif; 1935 bool ref = true; 1936 1937 PFSYNC_LOCK(sc); 1938 if (st->state_flags & PFSTATE_ACK) 1939 pfsync_undefer_state(st, 1); 1940 if (st->state_flags & PFSTATE_NOSYNC) { 1941 if (st->sync_state != PFSYNC_S_NONE) 1942 pfsync_q_del(st, true); 1943 PFSYNC_UNLOCK(sc); 1944 return; 1945 } 1946 1947 if (sc->sc_len == PFSYNC_MINPKT) 1948 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1949 1950 switch (st->sync_state) { 1951 case PFSYNC_S_INS: 1952 /* We never got to tell the world so just forget about it. */ 1953 pfsync_q_del(st, true); 1954 break; 1955 1956 case PFSYNC_S_UPD_C: 1957 case PFSYNC_S_UPD: 1958 case PFSYNC_S_IACK: 1959 pfsync_q_del(st, false); 1960 ref = false; 1961 /* FALLTHROUGH */ 1962 1963 case PFSYNC_S_NONE: 1964 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 1965 break; 1966 1967 default: 1968 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1969 } 1970 1971 PFSYNC_UNLOCK(sc); 1972 } 1973 1974 static void 1975 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1976 { 1977 struct pfsync_softc *sc = V_pfsyncif; 1978 struct { 1979 struct pfsync_subheader subh; 1980 struct pfsync_clr clr; 1981 } __packed r; 1982 1983 bzero(&r, sizeof(r)); 1984 1985 r.subh.action = PFSYNC_ACT_CLR; 1986 r.subh.count = htons(1); 1987 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1988 1989 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1990 r.clr.creatorid = creatorid; 1991 1992 PFSYNC_LOCK(sc); 1993 pfsync_send_plus(&r, sizeof(r)); 1994 PFSYNC_UNLOCK(sc); 1995 } 1996 1997 static void 1998 pfsync_q_ins(struct pf_state *st, int q, bool ref) 1999 { 2000 struct pfsync_softc *sc = V_pfsyncif; 2001 size_t nlen = pfsync_qs[q].len; 2002 2003 PFSYNC_LOCK_ASSERT(sc); 2004 2005 KASSERT(st->sync_state == PFSYNC_S_NONE, 2006 ("%s: st->sync_state %u", __func__, st->sync_state)); 2007 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2008 sc->sc_len)); 2009 2010 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2011 nlen += sizeof(struct pfsync_subheader); 2012 2013 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 2014 pfsync_sendout(1); 2015 2016 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2017 } 2018 2019 sc->sc_len += nlen; 2020 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2021 st->sync_state = q; 2022 if (ref) 2023 pf_ref_state(st); 2024 } 2025 2026 static void 2027 pfsync_q_del(struct pf_state *st, bool unref) 2028 { 2029 struct pfsync_softc *sc = V_pfsyncif; 2030 int q = st->sync_state; 2031 2032 PFSYNC_LOCK_ASSERT(sc); 2033 KASSERT(st->sync_state != PFSYNC_S_NONE, 2034 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2035 2036 sc->sc_len -= pfsync_qs[q].len; 2037 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2038 st->sync_state = PFSYNC_S_NONE; 2039 if (unref) 2040 pf_release_state(st); 2041 2042 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2043 sc->sc_len -= sizeof(struct pfsync_subheader); 2044 } 2045 2046 static void 2047 pfsync_bulk_start(void) 2048 { 2049 struct pfsync_softc *sc = V_pfsyncif; 2050 2051 if (V_pf_status.debug >= PF_DEBUG_MISC) 2052 printf("pfsync: received bulk update request\n"); 2053 2054 PFSYNC_BLOCK(sc); 2055 2056 sc->sc_ureq_received = time_uptime; 2057 sc->sc_bulk_hashid = 0; 2058 sc->sc_bulk_stateid = 0; 2059 pfsync_bulk_status(PFSYNC_BUS_START); 2060 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2061 PFSYNC_BUNLOCK(sc); 2062 } 2063 2064 static void 2065 pfsync_bulk_update(void *arg) 2066 { 2067 struct pfsync_softc *sc = arg; 2068 struct pf_state *s; 2069 int i, sent = 0; 2070 2071 PFSYNC_BLOCK_ASSERT(sc); 2072 CURVNET_SET(sc->sc_ifp->if_vnet); 2073 2074 /* 2075 * Start with last state from previous invocation. 2076 * It may had gone, in this case start from the 2077 * hash slot. 2078 */ 2079 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2080 2081 if (s != NULL) 2082 i = PF_IDHASH(s); 2083 else 2084 i = sc->sc_bulk_hashid; 2085 2086 for (; i <= pf_hashmask; i++) { 2087 struct pf_idhash *ih = &V_pf_idhash[i]; 2088 2089 if (s != NULL) 2090 PF_HASHROW_ASSERT(ih); 2091 else { 2092 PF_HASHROW_LOCK(ih); 2093 s = LIST_FIRST(&ih->states); 2094 } 2095 2096 for (; s; s = LIST_NEXT(s, entry)) { 2097 2098 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2099 sizeof(struct pfsync_state)) { 2100 /* We've filled a packet. */ 2101 sc->sc_bulk_hashid = i; 2102 sc->sc_bulk_stateid = s->id; 2103 sc->sc_bulk_creatorid = s->creatorid; 2104 PF_HASHROW_UNLOCK(ih); 2105 callout_reset(&sc->sc_bulk_tmo, 1, 2106 pfsync_bulk_update, sc); 2107 goto full; 2108 } 2109 2110 if (s->sync_state == PFSYNC_S_NONE && 2111 s->timeout < PFTM_MAX && 2112 s->pfsync_time <= sc->sc_ureq_received) { 2113 pfsync_update_state_req(s); 2114 sent++; 2115 } 2116 } 2117 PF_HASHROW_UNLOCK(ih); 2118 } 2119 2120 /* We're done. */ 2121 pfsync_bulk_status(PFSYNC_BUS_END); 2122 2123 full: 2124 CURVNET_RESTORE(); 2125 } 2126 2127 static void 2128 pfsync_bulk_status(u_int8_t status) 2129 { 2130 struct { 2131 struct pfsync_subheader subh; 2132 struct pfsync_bus bus; 2133 } __packed r; 2134 2135 struct pfsync_softc *sc = V_pfsyncif; 2136 2137 bzero(&r, sizeof(r)); 2138 2139 r.subh.action = PFSYNC_ACT_BUS; 2140 r.subh.count = htons(1); 2141 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2142 2143 r.bus.creatorid = V_pf_status.hostid; 2144 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2145 r.bus.status = status; 2146 2147 PFSYNC_LOCK(sc); 2148 pfsync_send_plus(&r, sizeof(r)); 2149 PFSYNC_UNLOCK(sc); 2150 } 2151 2152 static void 2153 pfsync_bulk_fail(void *arg) 2154 { 2155 struct pfsync_softc *sc = arg; 2156 2157 CURVNET_SET(sc->sc_ifp->if_vnet); 2158 2159 PFSYNC_BLOCK_ASSERT(sc); 2160 2161 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2162 /* Try again */ 2163 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2164 pfsync_bulk_fail, V_pfsyncif); 2165 PFSYNC_LOCK(sc); 2166 pfsync_request_update(0, 0); 2167 PFSYNC_UNLOCK(sc); 2168 } else { 2169 /* Pretend like the transfer was ok. */ 2170 sc->sc_ureq_sent = 0; 2171 sc->sc_bulk_tries = 0; 2172 PFSYNC_LOCK(sc); 2173 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2174 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2175 "pfsync bulk fail"); 2176 sc->sc_flags |= PFSYNCF_OK; 2177 PFSYNC_UNLOCK(sc); 2178 if (V_pf_status.debug >= PF_DEBUG_MISC) 2179 printf("pfsync: failed to receive bulk update\n"); 2180 } 2181 2182 CURVNET_RESTORE(); 2183 } 2184 2185 static void 2186 pfsync_send_plus(void *plus, size_t pluslen) 2187 { 2188 struct pfsync_softc *sc = V_pfsyncif; 2189 2190 PFSYNC_LOCK_ASSERT(sc); 2191 2192 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2193 pfsync_sendout(1); 2194 2195 sc->sc_plus = plus; 2196 sc->sc_len += (sc->sc_pluslen = pluslen); 2197 2198 pfsync_sendout(1); 2199 } 2200 2201 static void 2202 pfsync_timeout(void *arg) 2203 { 2204 struct pfsync_softc *sc = arg; 2205 2206 CURVNET_SET(sc->sc_ifp->if_vnet); 2207 PFSYNC_LOCK(sc); 2208 pfsync_push(sc); 2209 PFSYNC_UNLOCK(sc); 2210 CURVNET_RESTORE(); 2211 } 2212 2213 static void 2214 pfsync_push(struct pfsync_softc *sc) 2215 { 2216 2217 PFSYNC_LOCK_ASSERT(sc); 2218 2219 sc->sc_flags |= PFSYNCF_PUSH; 2220 swi_sched(V_pfsync_swi_cookie, 0); 2221 } 2222 2223 static void 2224 pfsyncintr(void *arg) 2225 { 2226 struct pfsync_softc *sc = arg; 2227 struct mbuf *m, *n; 2228 2229 CURVNET_SET(sc->sc_ifp->if_vnet); 2230 2231 PFSYNC_LOCK(sc); 2232 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2233 pfsync_sendout(0); 2234 sc->sc_flags &= ~PFSYNCF_PUSH; 2235 } 2236 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2237 PFSYNC_UNLOCK(sc); 2238 2239 for (; m != NULL; m = n) { 2240 2241 n = m->m_nextpkt; 2242 m->m_nextpkt = NULL; 2243 2244 /* 2245 * We distinguish between a deferral packet and our 2246 * own pfsync packet based on M_SKIP_FIREWALL 2247 * flag. This is XXX. 2248 */ 2249 if (m->m_flags & M_SKIP_FIREWALL) 2250 ip_output(m, NULL, NULL, 0, NULL, NULL); 2251 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2252 NULL) == 0) 2253 V_pfsyncstats.pfsyncs_opackets++; 2254 else 2255 V_pfsyncstats.pfsyncs_oerrors++; 2256 } 2257 CURVNET_RESTORE(); 2258 } 2259 2260 static int 2261 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2262 { 2263 struct ip_moptions *imo = &sc->sc_imo; 2264 int error; 2265 2266 if (!(ifp->if_flags & IFF_MULTICAST)) 2267 return (EADDRNOTAVAIL); 2268 2269 imo->imo_membership = (struct in_multi **)mship; 2270 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2271 imo->imo_multicast_vif = -1; 2272 2273 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2274 &imo->imo_membership[0])) != 0) { 2275 imo->imo_membership = NULL; 2276 return (error); 2277 } 2278 imo->imo_num_memberships++; 2279 imo->imo_multicast_ifp = ifp; 2280 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2281 imo->imo_multicast_loop = 0; 2282 2283 return (0); 2284 } 2285 2286 static void 2287 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2288 { 2289 struct ip_moptions *imo = &sc->sc_imo; 2290 2291 in_leavegroup(imo->imo_membership[0], NULL); 2292 free(imo->imo_membership, M_PFSYNC); 2293 imo->imo_membership = NULL; 2294 imo->imo_multicast_ifp = NULL; 2295 } 2296 2297 void 2298 pfsync_detach_ifnet(struct ifnet *ifp) 2299 { 2300 struct pfsync_softc *sc = V_pfsyncif; 2301 2302 if (sc == NULL) 2303 return; 2304 2305 PFSYNC_LOCK(sc); 2306 2307 if (sc->sc_sync_if == ifp) { 2308 /* We don't need mutlicast cleanup here, because the interface 2309 * is going away. We do need to ensure we don't try to do 2310 * cleanup later. 2311 */ 2312 sc->sc_imo.imo_membership = NULL; 2313 sc->sc_imo.imo_multicast_ifp = NULL; 2314 sc->sc_sync_if = NULL; 2315 } 2316 2317 PFSYNC_UNLOCK(sc); 2318 } 2319 2320 #ifdef INET 2321 extern struct domain inetdomain; 2322 static struct protosw in_pfsync_protosw = { 2323 .pr_type = SOCK_RAW, 2324 .pr_domain = &inetdomain, 2325 .pr_protocol = IPPROTO_PFSYNC, 2326 .pr_flags = PR_ATOMIC|PR_ADDR, 2327 .pr_input = pfsync_input, 2328 .pr_output = rip_output, 2329 .pr_ctloutput = rip_ctloutput, 2330 .pr_usrreqs = &rip_usrreqs 2331 }; 2332 #endif 2333 2334 static void 2335 pfsync_pointers_init() 2336 { 2337 2338 PF_RULES_WLOCK(); 2339 V_pfsync_state_import_ptr = pfsync_state_import; 2340 V_pfsync_insert_state_ptr = pfsync_insert_state; 2341 V_pfsync_update_state_ptr = pfsync_update_state; 2342 V_pfsync_delete_state_ptr = pfsync_delete_state; 2343 V_pfsync_clear_states_ptr = pfsync_clear_states; 2344 V_pfsync_defer_ptr = pfsync_defer; 2345 PF_RULES_WUNLOCK(); 2346 } 2347 2348 static void 2349 pfsync_pointers_uninit() 2350 { 2351 2352 PF_RULES_WLOCK(); 2353 V_pfsync_state_import_ptr = NULL; 2354 V_pfsync_insert_state_ptr = NULL; 2355 V_pfsync_update_state_ptr = NULL; 2356 V_pfsync_delete_state_ptr = NULL; 2357 V_pfsync_clear_states_ptr = NULL; 2358 V_pfsync_defer_ptr = NULL; 2359 PF_RULES_WUNLOCK(); 2360 } 2361 2362 static void 2363 vnet_pfsync_init(const void *unused __unused) 2364 { 2365 int error; 2366 2367 V_pfsync_cloner = if_clone_simple(pfsyncname, 2368 pfsync_clone_create, pfsync_clone_destroy, 1); 2369 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2370 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2371 if (error) { 2372 if_clone_detach(V_pfsync_cloner); 2373 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2374 } 2375 2376 pfsync_pointers_init(); 2377 } 2378 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2379 vnet_pfsync_init, NULL); 2380 2381 static void 2382 vnet_pfsync_uninit(const void *unused __unused) 2383 { 2384 2385 pfsync_pointers_uninit(); 2386 2387 if_clone_detach(V_pfsync_cloner); 2388 swi_remove(V_pfsync_swi_cookie); 2389 } 2390 2391 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, 2392 vnet_pfsync_uninit, NULL); 2393 2394 static int 2395 pfsync_init() 2396 { 2397 #ifdef INET 2398 int error; 2399 2400 pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; 2401 2402 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2403 if (error) 2404 return (error); 2405 error = ipproto_register(IPPROTO_PFSYNC); 2406 if (error) { 2407 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2408 return (error); 2409 } 2410 #endif 2411 2412 return (0); 2413 } 2414 2415 static void 2416 pfsync_uninit() 2417 { 2418 pfsync_detach_ifnet_ptr = NULL; 2419 2420 #ifdef INET 2421 ipproto_unregister(IPPROTO_PFSYNC); 2422 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2423 #endif 2424 } 2425 2426 static int 2427 pfsync_modevent(module_t mod, int type, void *data) 2428 { 2429 int error = 0; 2430 2431 switch (type) { 2432 case MOD_LOAD: 2433 error = pfsync_init(); 2434 break; 2435 case MOD_UNLOAD: 2436 pfsync_uninit(); 2437 break; 2438 default: 2439 error = EINVAL; 2440 break; 2441 } 2442 2443 return (error); 2444 } 2445 2446 static moduledata_t pfsync_mod = { 2447 pfsyncname, 2448 pfsync_modevent, 2449 0 2450 }; 2451 2452 #define PFSYNC_MODVER 1 2453 2454 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2455 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2456 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2457 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2458