1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/priv.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/sockio.h> 82 #include <sys/sysctl.h> 83 #include <sys/syslog.h> 84 85 #include <net/bpf.h> 86 #include <net/if.h> 87 #include <net/if_var.h> 88 #include <net/if_clone.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/if_pfsync.h> 93 94 #include <netinet/if_ether.h> 95 #include <netinet/in.h> 96 #include <netinet/in_var.h> 97 #include <netinet/ip.h> 98 #include <netinet/ip_carp.h> 99 #include <netinet/ip_var.h> 100 #include <netinet/tcp.h> 101 #include <netinet/tcp_fsm.h> 102 #include <netinet/tcp_seq.h> 103 104 #define PFSYNC_MINPKT ( \ 105 sizeof(struct ip) + \ 106 sizeof(struct pfsync_header) + \ 107 sizeof(struct pfsync_subheader) ) 108 109 struct pfsync_pkt { 110 struct ip *ip; 111 struct in_addr src; 112 u_int8_t flags; 113 }; 114 115 static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 116 struct pfsync_state_peer *); 117 static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 118 static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 119 static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 120 static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 121 static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 122 static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 123 static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 124 static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 125 static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 126 static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 127 static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 128 static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 129 130 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 131 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 132 pfsync_in_ins, /* PFSYNC_ACT_INS */ 133 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 134 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 135 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 136 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 137 pfsync_in_del, /* PFSYNC_ACT_DEL */ 138 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 139 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 140 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 141 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 142 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 143 pfsync_in_eof /* PFSYNC_ACT_EOF */ 144 }; 145 146 struct pfsync_q { 147 void (*write)(struct pf_state *, void *); 148 size_t len; 149 u_int8_t action; 150 }; 151 152 /* we have one of these for every PFSYNC_S_ */ 153 static void pfsync_out_state(struct pf_state *, void *); 154 static void pfsync_out_iack(struct pf_state *, void *); 155 static void pfsync_out_upd_c(struct pf_state *, void *); 156 static void pfsync_out_del(struct pf_state *, void *); 157 158 static struct pfsync_q pfsync_qs[] = { 159 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 160 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 161 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 162 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 163 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 164 }; 165 166 static void pfsync_q_ins(struct pf_state *, int, bool); 167 static void pfsync_q_del(struct pf_state *, bool); 168 169 static void pfsync_update_state(struct pf_state *); 170 171 struct pfsync_upd_req_item { 172 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 173 struct pfsync_upd_req ur_msg; 174 }; 175 176 struct pfsync_deferral { 177 struct pfsync_softc *pd_sc; 178 TAILQ_ENTRY(pfsync_deferral) pd_entry; 179 u_int pd_refs; 180 struct callout pd_tmo; 181 182 struct pf_state *pd_st; 183 struct mbuf *pd_m; 184 }; 185 186 struct pfsync_softc { 187 /* Configuration */ 188 struct ifnet *sc_ifp; 189 struct ifnet *sc_sync_if; 190 struct ip_moptions sc_imo; 191 struct in_addr sc_sync_peer; 192 uint32_t sc_flags; 193 #define PFSYNCF_OK 0x00000001 194 #define PFSYNCF_DEFER 0x00000002 195 #define PFSYNCF_PUSH 0x00000004 196 uint8_t sc_maxupdates; 197 struct ip sc_template; 198 struct callout sc_tmo; 199 struct mtx sc_mtx; 200 201 /* Queued data */ 202 size_t sc_len; 203 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 204 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 205 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 206 u_int sc_deferred; 207 void *sc_plus; 208 size_t sc_pluslen; 209 210 /* Bulk update info */ 211 struct mtx sc_bulk_mtx; 212 uint32_t sc_ureq_sent; 213 int sc_bulk_tries; 214 uint32_t sc_ureq_received; 215 int sc_bulk_hashid; 216 uint64_t sc_bulk_stateid; 217 uint32_t sc_bulk_creatorid; 218 struct callout sc_bulk_tmo; 219 struct callout sc_bulkfail_tmo; 220 }; 221 222 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 223 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 224 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 225 226 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 227 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 228 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 229 230 static const char pfsyncname[] = "pfsync"; 231 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 232 static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; 233 #define V_pfsyncif VNET(pfsyncif) 234 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; 235 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 236 static VNET_DEFINE(struct pfsyncstats, pfsyncstats); 237 #define V_pfsyncstats VNET(pfsyncstats) 238 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; 239 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 240 241 static void pfsync_timeout(void *); 242 static void pfsync_push(struct pfsync_softc *); 243 static void pfsyncintr(void *); 244 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 245 void *); 246 static void pfsync_multicast_cleanup(struct pfsync_softc *); 247 static void pfsync_pointers_init(void); 248 static void pfsync_pointers_uninit(void); 249 static int pfsync_init(void); 250 static void pfsync_uninit(void); 251 252 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 253 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 254 &VNET_NAME(pfsyncstats), pfsyncstats, 255 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 256 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 257 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 258 259 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 260 static void pfsync_clone_destroy(struct ifnet *); 261 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 262 struct pf_state_peer *); 263 static int pfsyncoutput(struct ifnet *, struct mbuf *, 264 const struct sockaddr *, struct route *); 265 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 266 267 static int pfsync_defer(struct pf_state *, struct mbuf *); 268 static void pfsync_undefer(struct pfsync_deferral *, int); 269 static void pfsync_undefer_state(struct pf_state *, int); 270 static void pfsync_defer_tmo(void *); 271 272 static void pfsync_request_update(u_int32_t, u_int64_t); 273 static void pfsync_update_state_req(struct pf_state *); 274 275 static void pfsync_drop(struct pfsync_softc *); 276 static void pfsync_sendout(int); 277 static void pfsync_send_plus(void *, size_t); 278 279 static void pfsync_bulk_start(void); 280 static void pfsync_bulk_status(u_int8_t); 281 static void pfsync_bulk_update(void *); 282 static void pfsync_bulk_fail(void *); 283 284 #ifdef IPSEC 285 static void pfsync_update_net_tdb(struct pfsync_tdb *); 286 #endif 287 288 #define PFSYNC_MAX_BULKTRIES 12 289 290 VNET_DEFINE(struct if_clone *, pfsync_cloner); 291 #define V_pfsync_cloner VNET(pfsync_cloner) 292 293 static int 294 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 295 { 296 struct pfsync_softc *sc; 297 struct ifnet *ifp; 298 int q; 299 300 if (unit != 0) 301 return (EINVAL); 302 303 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 304 sc->sc_flags |= PFSYNCF_OK; 305 306 for (q = 0; q < PFSYNC_S_COUNT; q++) 307 TAILQ_INIT(&sc->sc_qs[q]); 308 309 TAILQ_INIT(&sc->sc_upd_req_list); 310 TAILQ_INIT(&sc->sc_deferrals); 311 312 sc->sc_len = PFSYNC_MINPKT; 313 sc->sc_maxupdates = 128; 314 315 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 316 if (ifp == NULL) { 317 free(sc, M_PFSYNC); 318 return (ENOSPC); 319 } 320 if_initname(ifp, pfsyncname, unit); 321 ifp->if_softc = sc; 322 ifp->if_ioctl = pfsyncioctl; 323 ifp->if_output = pfsyncoutput; 324 ifp->if_type = IFT_PFSYNC; 325 ifp->if_snd.ifq_maxlen = ifqmaxlen; 326 ifp->if_hdrlen = sizeof(struct pfsync_header); 327 ifp->if_mtu = ETHERMTU; 328 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 329 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 330 callout_init(&sc->sc_tmo, 1); 331 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 332 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 333 334 if_attach(ifp); 335 336 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 337 338 V_pfsyncif = sc; 339 340 return (0); 341 } 342 343 static void 344 pfsync_clone_destroy(struct ifnet *ifp) 345 { 346 struct pfsync_softc *sc = ifp->if_softc; 347 348 /* 349 * At this stage, everything should have already been 350 * cleared by pfsync_uninit(), and we have only to 351 * drain callouts. 352 */ 353 while (sc->sc_deferred > 0) { 354 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 355 356 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 357 sc->sc_deferred--; 358 if (callout_stop(&pd->pd_tmo) > 0) { 359 pf_release_state(pd->pd_st); 360 m_freem(pd->pd_m); 361 free(pd, M_PFSYNC); 362 } else { 363 pd->pd_refs++; 364 callout_drain(&pd->pd_tmo); 365 free(pd, M_PFSYNC); 366 } 367 } 368 369 callout_drain(&sc->sc_tmo); 370 callout_drain(&sc->sc_bulkfail_tmo); 371 callout_drain(&sc->sc_bulk_tmo); 372 373 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 374 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 375 bpfdetach(ifp); 376 if_detach(ifp); 377 378 pfsync_drop(sc); 379 380 if_free(ifp); 381 if (sc->sc_imo.imo_membership) 382 pfsync_multicast_cleanup(sc); 383 mtx_destroy(&sc->sc_mtx); 384 mtx_destroy(&sc->sc_bulk_mtx); 385 free(sc, M_PFSYNC); 386 387 V_pfsyncif = NULL; 388 } 389 390 static int 391 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 392 struct pf_state_peer *d) 393 { 394 if (s->scrub.scrub_flag && d->scrub == NULL) { 395 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 396 if (d->scrub == NULL) 397 return (ENOMEM); 398 } 399 400 return (0); 401 } 402 403 404 static int 405 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 406 { 407 struct pfsync_softc *sc = V_pfsyncif; 408 #ifndef __NO_STRICT_ALIGNMENT 409 struct pfsync_state_key key[2]; 410 #endif 411 struct pfsync_state_key *kw, *ks; 412 struct pf_state *st = NULL; 413 struct pf_state_key *skw = NULL, *sks = NULL; 414 struct pf_rule *r = NULL; 415 struct pfi_kif *kif; 416 int error; 417 418 PF_RULES_RASSERT(); 419 420 if (sp->creatorid == 0) { 421 if (V_pf_status.debug >= PF_DEBUG_MISC) 422 printf("%s: invalid creator id: %08x\n", __func__, 423 ntohl(sp->creatorid)); 424 return (EINVAL); 425 } 426 427 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 428 if (V_pf_status.debug >= PF_DEBUG_MISC) 429 printf("%s: unknown interface: %s\n", __func__, 430 sp->ifname); 431 if (flags & PFSYNC_SI_IOCTL) 432 return (EINVAL); 433 return (0); /* skip this state */ 434 } 435 436 /* 437 * If the ruleset checksums match or the state is coming from the ioctl, 438 * it's safe to associate the state with the rule of that number. 439 */ 440 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 441 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 442 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 443 r = pf_main_ruleset.rules[ 444 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 445 else 446 r = &V_pf_default_rule; 447 448 if ((r->max_states && 449 counter_u64_fetch(r->states_cur) >= r->max_states)) 450 goto cleanup; 451 452 /* 453 * XXXGL: consider M_WAITOK in ioctl path after. 454 */ 455 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 456 goto cleanup; 457 458 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 459 goto cleanup; 460 461 #ifndef __NO_STRICT_ALIGNMENT 462 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 463 kw = &key[PF_SK_WIRE]; 464 ks = &key[PF_SK_STACK]; 465 #else 466 kw = &sp->key[PF_SK_WIRE]; 467 ks = &sp->key[PF_SK_STACK]; 468 #endif 469 470 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 471 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 472 kw->port[0] != ks->port[0] || 473 kw->port[1] != ks->port[1]) { 474 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 475 if (sks == NULL) 476 goto cleanup; 477 } else 478 sks = skw; 479 480 /* allocate memory for scrub info */ 481 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 482 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 483 goto cleanup; 484 485 /* Copy to state key(s). */ 486 skw->addr[0] = kw->addr[0]; 487 skw->addr[1] = kw->addr[1]; 488 skw->port[0] = kw->port[0]; 489 skw->port[1] = kw->port[1]; 490 skw->proto = sp->proto; 491 skw->af = sp->af; 492 if (sks != skw) { 493 sks->addr[0] = ks->addr[0]; 494 sks->addr[1] = ks->addr[1]; 495 sks->port[0] = ks->port[0]; 496 sks->port[1] = ks->port[1]; 497 sks->proto = sp->proto; 498 sks->af = sp->af; 499 } 500 501 /* copy to state */ 502 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 503 st->creation = time_uptime - ntohl(sp->creation); 504 st->expire = time_uptime; 505 if (sp->expire) { 506 uint32_t timeout; 507 508 timeout = r->timeout[sp->timeout]; 509 if (!timeout) 510 timeout = V_pf_default_rule.timeout[sp->timeout]; 511 512 /* sp->expire may have been adaptively scaled by export. */ 513 st->expire -= timeout - ntohl(sp->expire); 514 } 515 516 st->direction = sp->direction; 517 st->log = sp->log; 518 st->timeout = sp->timeout; 519 st->state_flags = sp->state_flags; 520 521 st->id = sp->id; 522 st->creatorid = sp->creatorid; 523 pf_state_peer_ntoh(&sp->src, &st->src); 524 pf_state_peer_ntoh(&sp->dst, &st->dst); 525 526 st->rule.ptr = r; 527 st->nat_rule.ptr = NULL; 528 st->anchor.ptr = NULL; 529 st->rt_kif = NULL; 530 531 st->pfsync_time = time_uptime; 532 st->sync_state = PFSYNC_S_NONE; 533 534 if (!(flags & PFSYNC_SI_IOCTL)) 535 st->state_flags |= PFSTATE_NOSYNC; 536 537 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) 538 goto cleanup_state; 539 540 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 541 counter_u64_add(r->states_cur, 1); 542 counter_u64_add(r->states_tot, 1); 543 544 if (!(flags & PFSYNC_SI_IOCTL)) { 545 st->state_flags &= ~PFSTATE_NOSYNC; 546 if (st->state_flags & PFSTATE_ACK) { 547 pfsync_q_ins(st, PFSYNC_S_IACK, true); 548 pfsync_push(sc); 549 } 550 } 551 st->state_flags &= ~PFSTATE_ACK; 552 PF_STATE_UNLOCK(st); 553 554 return (0); 555 556 cleanup: 557 error = ENOMEM; 558 if (skw == sks) 559 sks = NULL; 560 if (skw != NULL) 561 uma_zfree(V_pf_state_key_z, skw); 562 if (sks != NULL) 563 uma_zfree(V_pf_state_key_z, sks); 564 565 cleanup_state: /* pf_state_insert() frees the state keys. */ 566 if (st) { 567 if (st->dst.scrub) 568 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 569 if (st->src.scrub) 570 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 571 uma_zfree(V_pf_state_z, st); 572 } 573 return (error); 574 } 575 576 static int 577 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 578 { 579 struct pfsync_softc *sc = V_pfsyncif; 580 struct pfsync_pkt pkt; 581 struct mbuf *m = *mp; 582 struct ip *ip = mtod(m, struct ip *); 583 struct pfsync_header *ph; 584 struct pfsync_subheader subh; 585 586 int offset, len; 587 int rv; 588 uint16_t count; 589 590 PF_RULES_RLOCK_TRACKER; 591 592 *mp = NULL; 593 V_pfsyncstats.pfsyncs_ipackets++; 594 595 /* Verify that we have a sync interface configured. */ 596 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 597 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 598 goto done; 599 600 /* verify that the packet came in on the right interface */ 601 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 602 V_pfsyncstats.pfsyncs_badif++; 603 goto done; 604 } 605 606 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 607 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 608 /* verify that the IP TTL is 255. */ 609 if (ip->ip_ttl != PFSYNC_DFLTTL) { 610 V_pfsyncstats.pfsyncs_badttl++; 611 goto done; 612 } 613 614 offset = ip->ip_hl << 2; 615 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 616 V_pfsyncstats.pfsyncs_hdrops++; 617 goto done; 618 } 619 620 if (offset + sizeof(*ph) > m->m_len) { 621 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 622 V_pfsyncstats.pfsyncs_hdrops++; 623 return (IPPROTO_DONE); 624 } 625 ip = mtod(m, struct ip *); 626 } 627 ph = (struct pfsync_header *)((char *)ip + offset); 628 629 /* verify the version */ 630 if (ph->version != PFSYNC_VERSION) { 631 V_pfsyncstats.pfsyncs_badver++; 632 goto done; 633 } 634 635 len = ntohs(ph->len) + offset; 636 if (m->m_pkthdr.len < len) { 637 V_pfsyncstats.pfsyncs_badlen++; 638 goto done; 639 } 640 641 /* Cheaper to grab this now than having to mess with mbufs later */ 642 pkt.ip = ip; 643 pkt.src = ip->ip_src; 644 pkt.flags = 0; 645 646 /* 647 * Trusting pf_chksum during packet processing, as well as seeking 648 * in interface name tree, require holding PF_RULES_RLOCK(). 649 */ 650 PF_RULES_RLOCK(); 651 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 652 pkt.flags |= PFSYNC_SI_CKSUM; 653 654 offset += sizeof(*ph); 655 while (offset <= len - sizeof(subh)) { 656 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 657 offset += sizeof(subh); 658 659 if (subh.action >= PFSYNC_ACT_MAX) { 660 V_pfsyncstats.pfsyncs_badact++; 661 PF_RULES_RUNLOCK(); 662 goto done; 663 } 664 665 count = ntohs(subh.count); 666 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 667 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 668 if (rv == -1) { 669 PF_RULES_RUNLOCK(); 670 return (IPPROTO_DONE); 671 } 672 673 offset += rv; 674 } 675 PF_RULES_RUNLOCK(); 676 677 done: 678 m_freem(m); 679 return (IPPROTO_DONE); 680 } 681 682 static int 683 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 684 { 685 struct pfsync_clr *clr; 686 struct mbuf *mp; 687 int len = sizeof(*clr) * count; 688 int i, offp; 689 u_int32_t creatorid; 690 691 mp = m_pulldown(m, offset, len, &offp); 692 if (mp == NULL) { 693 V_pfsyncstats.pfsyncs_badlen++; 694 return (-1); 695 } 696 clr = (struct pfsync_clr *)(mp->m_data + offp); 697 698 for (i = 0; i < count; i++) { 699 creatorid = clr[i].creatorid; 700 701 if (clr[i].ifname[0] != '\0' && 702 pfi_kif_find(clr[i].ifname) == NULL) 703 continue; 704 705 for (int i = 0; i <= pf_hashmask; i++) { 706 struct pf_idhash *ih = &V_pf_idhash[i]; 707 struct pf_state *s; 708 relock: 709 PF_HASHROW_LOCK(ih); 710 LIST_FOREACH(s, &ih->states, entry) { 711 if (s->creatorid == creatorid) { 712 s->state_flags |= PFSTATE_NOSYNC; 713 pf_unlink_state(s, PF_ENTER_LOCKED); 714 goto relock; 715 } 716 } 717 PF_HASHROW_UNLOCK(ih); 718 } 719 } 720 721 return (len); 722 } 723 724 static int 725 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 726 { 727 struct mbuf *mp; 728 struct pfsync_state *sa, *sp; 729 int len = sizeof(*sp) * count; 730 int i, offp; 731 732 mp = m_pulldown(m, offset, len, &offp); 733 if (mp == NULL) { 734 V_pfsyncstats.pfsyncs_badlen++; 735 return (-1); 736 } 737 sa = (struct pfsync_state *)(mp->m_data + offp); 738 739 for (i = 0; i < count; i++) { 740 sp = &sa[i]; 741 742 /* Check for invalid values. */ 743 if (sp->timeout >= PFTM_MAX || 744 sp->src.state > PF_TCPS_PROXY_DST || 745 sp->dst.state > PF_TCPS_PROXY_DST || 746 sp->direction > PF_OUT || 747 (sp->af != AF_INET && sp->af != AF_INET6)) { 748 if (V_pf_status.debug >= PF_DEBUG_MISC) 749 printf("%s: invalid value\n", __func__); 750 V_pfsyncstats.pfsyncs_badval++; 751 continue; 752 } 753 754 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 755 /* Drop out, but process the rest of the actions. */ 756 break; 757 } 758 759 return (len); 760 } 761 762 static int 763 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 764 { 765 struct pfsync_ins_ack *ia, *iaa; 766 struct pf_state *st; 767 768 struct mbuf *mp; 769 int len = count * sizeof(*ia); 770 int offp, i; 771 772 mp = m_pulldown(m, offset, len, &offp); 773 if (mp == NULL) { 774 V_pfsyncstats.pfsyncs_badlen++; 775 return (-1); 776 } 777 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 778 779 for (i = 0; i < count; i++) { 780 ia = &iaa[i]; 781 782 st = pf_find_state_byid(ia->id, ia->creatorid); 783 if (st == NULL) 784 continue; 785 786 if (st->state_flags & PFSTATE_ACK) { 787 PFSYNC_LOCK(V_pfsyncif); 788 pfsync_undefer_state(st, 0); 789 PFSYNC_UNLOCK(V_pfsyncif); 790 } 791 PF_STATE_UNLOCK(st); 792 } 793 /* 794 * XXX this is not yet implemented, but we know the size of the 795 * message so we can skip it. 796 */ 797 798 return (count * sizeof(struct pfsync_ins_ack)); 799 } 800 801 static int 802 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 803 struct pfsync_state_peer *dst) 804 { 805 int sync = 0; 806 807 PF_STATE_LOCK_ASSERT(st); 808 809 /* 810 * The state should never go backwards except 811 * for syn-proxy states. Neither should the 812 * sequence window slide backwards. 813 */ 814 if ((st->src.state > src->state && 815 (st->src.state < PF_TCPS_PROXY_SRC || 816 src->state >= PF_TCPS_PROXY_SRC)) || 817 818 (st->src.state == src->state && 819 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 820 sync++; 821 else 822 pf_state_peer_ntoh(src, &st->src); 823 824 if ((st->dst.state > dst->state) || 825 826 (st->dst.state >= TCPS_SYN_SENT && 827 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 828 sync++; 829 else 830 pf_state_peer_ntoh(dst, &st->dst); 831 832 return (sync); 833 } 834 835 static int 836 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 837 { 838 struct pfsync_softc *sc = V_pfsyncif; 839 struct pfsync_state *sa, *sp; 840 struct pf_state *st; 841 int sync; 842 843 struct mbuf *mp; 844 int len = count * sizeof(*sp); 845 int offp, i; 846 847 mp = m_pulldown(m, offset, len, &offp); 848 if (mp == NULL) { 849 V_pfsyncstats.pfsyncs_badlen++; 850 return (-1); 851 } 852 sa = (struct pfsync_state *)(mp->m_data + offp); 853 854 for (i = 0; i < count; i++) { 855 sp = &sa[i]; 856 857 /* check for invalid values */ 858 if (sp->timeout >= PFTM_MAX || 859 sp->src.state > PF_TCPS_PROXY_DST || 860 sp->dst.state > PF_TCPS_PROXY_DST) { 861 if (V_pf_status.debug >= PF_DEBUG_MISC) { 862 printf("pfsync_input: PFSYNC_ACT_UPD: " 863 "invalid value\n"); 864 } 865 V_pfsyncstats.pfsyncs_badval++; 866 continue; 867 } 868 869 st = pf_find_state_byid(sp->id, sp->creatorid); 870 if (st == NULL) { 871 /* insert the update */ 872 if (pfsync_state_import(sp, 0)) 873 V_pfsyncstats.pfsyncs_badstate++; 874 continue; 875 } 876 877 if (st->state_flags & PFSTATE_ACK) { 878 PFSYNC_LOCK(sc); 879 pfsync_undefer_state(st, 1); 880 PFSYNC_UNLOCK(sc); 881 } 882 883 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 884 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 885 else { 886 sync = 0; 887 888 /* 889 * Non-TCP protocol state machine always go 890 * forwards 891 */ 892 if (st->src.state > sp->src.state) 893 sync++; 894 else 895 pf_state_peer_ntoh(&sp->src, &st->src); 896 if (st->dst.state > sp->dst.state) 897 sync++; 898 else 899 pf_state_peer_ntoh(&sp->dst, &st->dst); 900 } 901 if (sync < 2) { 902 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 903 pf_state_peer_ntoh(&sp->dst, &st->dst); 904 st->expire = time_uptime; 905 st->timeout = sp->timeout; 906 } 907 st->pfsync_time = time_uptime; 908 909 if (sync) { 910 V_pfsyncstats.pfsyncs_stale++; 911 912 pfsync_update_state(st); 913 PF_STATE_UNLOCK(st); 914 PFSYNC_LOCK(sc); 915 pfsync_push(sc); 916 PFSYNC_UNLOCK(sc); 917 continue; 918 } 919 PF_STATE_UNLOCK(st); 920 } 921 922 return (len); 923 } 924 925 static int 926 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 927 { 928 struct pfsync_softc *sc = V_pfsyncif; 929 struct pfsync_upd_c *ua, *up; 930 struct pf_state *st; 931 int len = count * sizeof(*up); 932 int sync; 933 struct mbuf *mp; 934 int offp, i; 935 936 mp = m_pulldown(m, offset, len, &offp); 937 if (mp == NULL) { 938 V_pfsyncstats.pfsyncs_badlen++; 939 return (-1); 940 } 941 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 942 943 for (i = 0; i < count; i++) { 944 up = &ua[i]; 945 946 /* check for invalid values */ 947 if (up->timeout >= PFTM_MAX || 948 up->src.state > PF_TCPS_PROXY_DST || 949 up->dst.state > PF_TCPS_PROXY_DST) { 950 if (V_pf_status.debug >= PF_DEBUG_MISC) { 951 printf("pfsync_input: " 952 "PFSYNC_ACT_UPD_C: " 953 "invalid value\n"); 954 } 955 V_pfsyncstats.pfsyncs_badval++; 956 continue; 957 } 958 959 st = pf_find_state_byid(up->id, up->creatorid); 960 if (st == NULL) { 961 /* We don't have this state. Ask for it. */ 962 PFSYNC_LOCK(sc); 963 pfsync_request_update(up->creatorid, up->id); 964 PFSYNC_UNLOCK(sc); 965 continue; 966 } 967 968 if (st->state_flags & PFSTATE_ACK) { 969 PFSYNC_LOCK(sc); 970 pfsync_undefer_state(st, 1); 971 PFSYNC_UNLOCK(sc); 972 } 973 974 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 975 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 976 else { 977 sync = 0; 978 979 /* 980 * Non-TCP protocol state machine always go 981 * forwards 982 */ 983 if (st->src.state > up->src.state) 984 sync++; 985 else 986 pf_state_peer_ntoh(&up->src, &st->src); 987 if (st->dst.state > up->dst.state) 988 sync++; 989 else 990 pf_state_peer_ntoh(&up->dst, &st->dst); 991 } 992 if (sync < 2) { 993 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 994 pf_state_peer_ntoh(&up->dst, &st->dst); 995 st->expire = time_uptime; 996 st->timeout = up->timeout; 997 } 998 st->pfsync_time = time_uptime; 999 1000 if (sync) { 1001 V_pfsyncstats.pfsyncs_stale++; 1002 1003 pfsync_update_state(st); 1004 PF_STATE_UNLOCK(st); 1005 PFSYNC_LOCK(sc); 1006 pfsync_push(sc); 1007 PFSYNC_UNLOCK(sc); 1008 continue; 1009 } 1010 PF_STATE_UNLOCK(st); 1011 } 1012 1013 return (len); 1014 } 1015 1016 static int 1017 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1018 { 1019 struct pfsync_upd_req *ur, *ura; 1020 struct mbuf *mp; 1021 int len = count * sizeof(*ur); 1022 int i, offp; 1023 1024 struct pf_state *st; 1025 1026 mp = m_pulldown(m, offset, len, &offp); 1027 if (mp == NULL) { 1028 V_pfsyncstats.pfsyncs_badlen++; 1029 return (-1); 1030 } 1031 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1032 1033 for (i = 0; i < count; i++) { 1034 ur = &ura[i]; 1035 1036 if (ur->id == 0 && ur->creatorid == 0) 1037 pfsync_bulk_start(); 1038 else { 1039 st = pf_find_state_byid(ur->id, ur->creatorid); 1040 if (st == NULL) { 1041 V_pfsyncstats.pfsyncs_badstate++; 1042 continue; 1043 } 1044 if (st->state_flags & PFSTATE_NOSYNC) { 1045 PF_STATE_UNLOCK(st); 1046 continue; 1047 } 1048 1049 pfsync_update_state_req(st); 1050 PF_STATE_UNLOCK(st); 1051 } 1052 } 1053 1054 return (len); 1055 } 1056 1057 static int 1058 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1059 { 1060 struct mbuf *mp; 1061 struct pfsync_state *sa, *sp; 1062 struct pf_state *st; 1063 int len = count * sizeof(*sp); 1064 int offp, i; 1065 1066 mp = m_pulldown(m, offset, len, &offp); 1067 if (mp == NULL) { 1068 V_pfsyncstats.pfsyncs_badlen++; 1069 return (-1); 1070 } 1071 sa = (struct pfsync_state *)(mp->m_data + offp); 1072 1073 for (i = 0; i < count; i++) { 1074 sp = &sa[i]; 1075 1076 st = pf_find_state_byid(sp->id, sp->creatorid); 1077 if (st == NULL) { 1078 V_pfsyncstats.pfsyncs_badstate++; 1079 continue; 1080 } 1081 st->state_flags |= PFSTATE_NOSYNC; 1082 pf_unlink_state(st, PF_ENTER_LOCKED); 1083 } 1084 1085 return (len); 1086 } 1087 1088 static int 1089 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1090 { 1091 struct mbuf *mp; 1092 struct pfsync_del_c *sa, *sp; 1093 struct pf_state *st; 1094 int len = count * sizeof(*sp); 1095 int offp, i; 1096 1097 mp = m_pulldown(m, offset, len, &offp); 1098 if (mp == NULL) { 1099 V_pfsyncstats.pfsyncs_badlen++; 1100 return (-1); 1101 } 1102 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1103 1104 for (i = 0; i < count; i++) { 1105 sp = &sa[i]; 1106 1107 st = pf_find_state_byid(sp->id, sp->creatorid); 1108 if (st == NULL) { 1109 V_pfsyncstats.pfsyncs_badstate++; 1110 continue; 1111 } 1112 1113 st->state_flags |= PFSTATE_NOSYNC; 1114 pf_unlink_state(st, PF_ENTER_LOCKED); 1115 } 1116 1117 return (len); 1118 } 1119 1120 static int 1121 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1122 { 1123 struct pfsync_softc *sc = V_pfsyncif; 1124 struct pfsync_bus *bus; 1125 struct mbuf *mp; 1126 int len = count * sizeof(*bus); 1127 int offp; 1128 1129 PFSYNC_BLOCK(sc); 1130 1131 /* If we're not waiting for a bulk update, who cares. */ 1132 if (sc->sc_ureq_sent == 0) { 1133 PFSYNC_BUNLOCK(sc); 1134 return (len); 1135 } 1136 1137 mp = m_pulldown(m, offset, len, &offp); 1138 if (mp == NULL) { 1139 PFSYNC_BUNLOCK(sc); 1140 V_pfsyncstats.pfsyncs_badlen++; 1141 return (-1); 1142 } 1143 bus = (struct pfsync_bus *)(mp->m_data + offp); 1144 1145 switch (bus->status) { 1146 case PFSYNC_BUS_START: 1147 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1148 V_pf_limits[PF_LIMIT_STATES].limit / 1149 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1150 sizeof(struct pfsync_state)), 1151 pfsync_bulk_fail, sc); 1152 if (V_pf_status.debug >= PF_DEBUG_MISC) 1153 printf("pfsync: received bulk update start\n"); 1154 break; 1155 1156 case PFSYNC_BUS_END: 1157 if (time_uptime - ntohl(bus->endtime) >= 1158 sc->sc_ureq_sent) { 1159 /* that's it, we're happy */ 1160 sc->sc_ureq_sent = 0; 1161 sc->sc_bulk_tries = 0; 1162 callout_stop(&sc->sc_bulkfail_tmo); 1163 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1164 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1165 "pfsync bulk done"); 1166 sc->sc_flags |= PFSYNCF_OK; 1167 if (V_pf_status.debug >= PF_DEBUG_MISC) 1168 printf("pfsync: received valid " 1169 "bulk update end\n"); 1170 } else { 1171 if (V_pf_status.debug >= PF_DEBUG_MISC) 1172 printf("pfsync: received invalid " 1173 "bulk update end: bad timestamp\n"); 1174 } 1175 break; 1176 } 1177 PFSYNC_BUNLOCK(sc); 1178 1179 return (len); 1180 } 1181 1182 static int 1183 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1184 { 1185 int len = count * sizeof(struct pfsync_tdb); 1186 1187 #if defined(IPSEC) 1188 struct pfsync_tdb *tp; 1189 struct mbuf *mp; 1190 int offp; 1191 int i; 1192 int s; 1193 1194 mp = m_pulldown(m, offset, len, &offp); 1195 if (mp == NULL) { 1196 V_pfsyncstats.pfsyncs_badlen++; 1197 return (-1); 1198 } 1199 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1200 1201 for (i = 0; i < count; i++) 1202 pfsync_update_net_tdb(&tp[i]); 1203 #endif 1204 1205 return (len); 1206 } 1207 1208 #if defined(IPSEC) 1209 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1210 static void 1211 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1212 { 1213 struct tdb *tdb; 1214 int s; 1215 1216 /* check for invalid values */ 1217 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1218 (pt->dst.sa.sa_family != AF_INET && 1219 pt->dst.sa.sa_family != AF_INET6)) 1220 goto bad; 1221 1222 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1223 if (tdb) { 1224 pt->rpl = ntohl(pt->rpl); 1225 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1226 1227 /* Neither replay nor byte counter should ever decrease. */ 1228 if (pt->rpl < tdb->tdb_rpl || 1229 pt->cur_bytes < tdb->tdb_cur_bytes) { 1230 goto bad; 1231 } 1232 1233 tdb->tdb_rpl = pt->rpl; 1234 tdb->tdb_cur_bytes = pt->cur_bytes; 1235 } 1236 return; 1237 1238 bad: 1239 if (V_pf_status.debug >= PF_DEBUG_MISC) 1240 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1241 "invalid value\n"); 1242 V_pfsyncstats.pfsyncs_badstate++; 1243 return; 1244 } 1245 #endif 1246 1247 1248 static int 1249 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1250 { 1251 /* check if we are at the right place in the packet */ 1252 if (offset != m->m_pkthdr.len) 1253 V_pfsyncstats.pfsyncs_badlen++; 1254 1255 /* we're done. free and let the caller return */ 1256 m_freem(m); 1257 return (-1); 1258 } 1259 1260 static int 1261 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1262 { 1263 V_pfsyncstats.pfsyncs_badact++; 1264 1265 m_freem(m); 1266 return (-1); 1267 } 1268 1269 static int 1270 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1271 struct route *rt) 1272 { 1273 m_freem(m); 1274 return (0); 1275 } 1276 1277 /* ARGSUSED */ 1278 static int 1279 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1280 { 1281 struct pfsync_softc *sc = ifp->if_softc; 1282 struct ifreq *ifr = (struct ifreq *)data; 1283 struct pfsyncreq pfsyncr; 1284 int error; 1285 1286 switch (cmd) { 1287 case SIOCSIFFLAGS: 1288 PFSYNC_LOCK(sc); 1289 if (ifp->if_flags & IFF_UP) { 1290 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1291 PFSYNC_UNLOCK(sc); 1292 pfsync_pointers_init(); 1293 } else { 1294 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1295 PFSYNC_UNLOCK(sc); 1296 pfsync_pointers_uninit(); 1297 } 1298 break; 1299 case SIOCSIFMTU: 1300 if (!sc->sc_sync_if || 1301 ifr->ifr_mtu <= PFSYNC_MINPKT || 1302 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1303 return (EINVAL); 1304 if (ifr->ifr_mtu < ifp->if_mtu) { 1305 PFSYNC_LOCK(sc); 1306 if (sc->sc_len > PFSYNC_MINPKT) 1307 pfsync_sendout(1); 1308 PFSYNC_UNLOCK(sc); 1309 } 1310 ifp->if_mtu = ifr->ifr_mtu; 1311 break; 1312 case SIOCGETPFSYNC: 1313 bzero(&pfsyncr, sizeof(pfsyncr)); 1314 PFSYNC_LOCK(sc); 1315 if (sc->sc_sync_if) { 1316 strlcpy(pfsyncr.pfsyncr_syncdev, 1317 sc->sc_sync_if->if_xname, IFNAMSIZ); 1318 } 1319 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1320 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1321 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1322 (sc->sc_flags & PFSYNCF_DEFER)); 1323 PFSYNC_UNLOCK(sc); 1324 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1325 sizeof(pfsyncr))); 1326 1327 case SIOCSETPFSYNC: 1328 { 1329 struct ip_moptions *imo = &sc->sc_imo; 1330 struct ifnet *sifp; 1331 struct ip *ip; 1332 void *mship = NULL; 1333 1334 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1335 return (error); 1336 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1337 sizeof(pfsyncr)))) 1338 return (error); 1339 1340 if (pfsyncr.pfsyncr_maxupdates > 255) 1341 return (EINVAL); 1342 1343 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1344 sifp = NULL; 1345 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1346 return (EINVAL); 1347 1348 if (sifp != NULL && ( 1349 pfsyncr.pfsyncr_syncpeer.s_addr == 0 || 1350 pfsyncr.pfsyncr_syncpeer.s_addr == 1351 htonl(INADDR_PFSYNC_GROUP))) 1352 mship = malloc((sizeof(struct in_multi *) * 1353 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1354 1355 PFSYNC_LOCK(sc); 1356 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1357 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1358 else 1359 sc->sc_sync_peer.s_addr = 1360 pfsyncr.pfsyncr_syncpeer.s_addr; 1361 1362 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1363 if (pfsyncr.pfsyncr_defer) { 1364 sc->sc_flags |= PFSYNCF_DEFER; 1365 pfsync_defer_ptr = pfsync_defer; 1366 } else { 1367 sc->sc_flags &= ~PFSYNCF_DEFER; 1368 pfsync_defer_ptr = NULL; 1369 } 1370 1371 if (sifp == NULL) { 1372 if (sc->sc_sync_if) 1373 if_rele(sc->sc_sync_if); 1374 sc->sc_sync_if = NULL; 1375 if (imo->imo_membership) 1376 pfsync_multicast_cleanup(sc); 1377 PFSYNC_UNLOCK(sc); 1378 break; 1379 } 1380 1381 if (sc->sc_len > PFSYNC_MINPKT && 1382 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1383 (sc->sc_sync_if != NULL && 1384 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1385 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1386 pfsync_sendout(1); 1387 1388 if (imo->imo_membership) 1389 pfsync_multicast_cleanup(sc); 1390 1391 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1392 error = pfsync_multicast_setup(sc, sifp, mship); 1393 if (error) { 1394 if_rele(sifp); 1395 free(mship, M_PFSYNC); 1396 return (error); 1397 } 1398 } 1399 if (sc->sc_sync_if) 1400 if_rele(sc->sc_sync_if); 1401 sc->sc_sync_if = sifp; 1402 1403 ip = &sc->sc_template; 1404 bzero(ip, sizeof(*ip)); 1405 ip->ip_v = IPVERSION; 1406 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1407 ip->ip_tos = IPTOS_LOWDELAY; 1408 /* len and id are set later. */ 1409 ip->ip_off = htons(IP_DF); 1410 ip->ip_ttl = PFSYNC_DFLTTL; 1411 ip->ip_p = IPPROTO_PFSYNC; 1412 ip->ip_src.s_addr = INADDR_ANY; 1413 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1414 1415 /* Request a full state table update. */ 1416 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1417 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1418 "pfsync bulk start"); 1419 sc->sc_flags &= ~PFSYNCF_OK; 1420 if (V_pf_status.debug >= PF_DEBUG_MISC) 1421 printf("pfsync: requesting bulk update\n"); 1422 pfsync_request_update(0, 0); 1423 PFSYNC_UNLOCK(sc); 1424 PFSYNC_BLOCK(sc); 1425 sc->sc_ureq_sent = time_uptime; 1426 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1427 sc); 1428 PFSYNC_BUNLOCK(sc); 1429 1430 break; 1431 } 1432 default: 1433 return (ENOTTY); 1434 } 1435 1436 return (0); 1437 } 1438 1439 static void 1440 pfsync_out_state(struct pf_state *st, void *buf) 1441 { 1442 struct pfsync_state *sp = buf; 1443 1444 pfsync_state_export(sp, st); 1445 } 1446 1447 static void 1448 pfsync_out_iack(struct pf_state *st, void *buf) 1449 { 1450 struct pfsync_ins_ack *iack = buf; 1451 1452 iack->id = st->id; 1453 iack->creatorid = st->creatorid; 1454 } 1455 1456 static void 1457 pfsync_out_upd_c(struct pf_state *st, void *buf) 1458 { 1459 struct pfsync_upd_c *up = buf; 1460 1461 bzero(up, sizeof(*up)); 1462 up->id = st->id; 1463 pf_state_peer_hton(&st->src, &up->src); 1464 pf_state_peer_hton(&st->dst, &up->dst); 1465 up->creatorid = st->creatorid; 1466 up->timeout = st->timeout; 1467 } 1468 1469 static void 1470 pfsync_out_del(struct pf_state *st, void *buf) 1471 { 1472 struct pfsync_del_c *dp = buf; 1473 1474 dp->id = st->id; 1475 dp->creatorid = st->creatorid; 1476 st->state_flags |= PFSTATE_NOSYNC; 1477 } 1478 1479 static void 1480 pfsync_drop(struct pfsync_softc *sc) 1481 { 1482 struct pf_state *st, *next; 1483 struct pfsync_upd_req_item *ur; 1484 int q; 1485 1486 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1487 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1488 continue; 1489 1490 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1491 KASSERT(st->sync_state == q, 1492 ("%s: st->sync_state == q", 1493 __func__)); 1494 st->sync_state = PFSYNC_S_NONE; 1495 pf_release_state(st); 1496 } 1497 TAILQ_INIT(&sc->sc_qs[q]); 1498 } 1499 1500 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1501 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1502 free(ur, M_PFSYNC); 1503 } 1504 1505 sc->sc_plus = NULL; 1506 sc->sc_len = PFSYNC_MINPKT; 1507 } 1508 1509 static void 1510 pfsync_sendout(int schedswi) 1511 { 1512 struct pfsync_softc *sc = V_pfsyncif; 1513 struct ifnet *ifp = sc->sc_ifp; 1514 struct mbuf *m; 1515 struct ip *ip; 1516 struct pfsync_header *ph; 1517 struct pfsync_subheader *subh; 1518 struct pf_state *st, *st_next; 1519 struct pfsync_upd_req_item *ur; 1520 int offset; 1521 int q, count = 0; 1522 1523 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1524 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1525 ("%s: sc_len %zu", __func__, sc->sc_len)); 1526 PFSYNC_LOCK_ASSERT(sc); 1527 1528 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1529 pfsync_drop(sc); 1530 return; 1531 } 1532 1533 m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1534 if (m == NULL) { 1535 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1536 V_pfsyncstats.pfsyncs_onomem++; 1537 return; 1538 } 1539 m->m_data += max_linkhdr; 1540 m->m_len = m->m_pkthdr.len = sc->sc_len; 1541 1542 /* build the ip header */ 1543 ip = (struct ip *)m->m_data; 1544 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1545 offset = sizeof(*ip); 1546 1547 ip->ip_len = htons(m->m_pkthdr.len); 1548 ip_fillid(ip); 1549 1550 /* build the pfsync header */ 1551 ph = (struct pfsync_header *)(m->m_data + offset); 1552 bzero(ph, sizeof(*ph)); 1553 offset += sizeof(*ph); 1554 1555 ph->version = PFSYNC_VERSION; 1556 ph->len = htons(sc->sc_len - sizeof(*ip)); 1557 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1558 1559 /* walk the queues */ 1560 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1561 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1562 continue; 1563 1564 subh = (struct pfsync_subheader *)(m->m_data + offset); 1565 offset += sizeof(*subh); 1566 1567 count = 0; 1568 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { 1569 KASSERT(st->sync_state == q, 1570 ("%s: st->sync_state == q", 1571 __func__)); 1572 /* 1573 * XXXGL: some of write methods do unlocked reads 1574 * of state data :( 1575 */ 1576 pfsync_qs[q].write(st, m->m_data + offset); 1577 offset += pfsync_qs[q].len; 1578 st->sync_state = PFSYNC_S_NONE; 1579 pf_release_state(st); 1580 count++; 1581 } 1582 TAILQ_INIT(&sc->sc_qs[q]); 1583 1584 bzero(subh, sizeof(*subh)); 1585 subh->action = pfsync_qs[q].action; 1586 subh->count = htons(count); 1587 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1588 } 1589 1590 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1591 subh = (struct pfsync_subheader *)(m->m_data + offset); 1592 offset += sizeof(*subh); 1593 1594 count = 0; 1595 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1596 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1597 1598 bcopy(&ur->ur_msg, m->m_data + offset, 1599 sizeof(ur->ur_msg)); 1600 offset += sizeof(ur->ur_msg); 1601 free(ur, M_PFSYNC); 1602 count++; 1603 } 1604 1605 bzero(subh, sizeof(*subh)); 1606 subh->action = PFSYNC_ACT_UPD_REQ; 1607 subh->count = htons(count); 1608 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1609 } 1610 1611 /* has someone built a custom region for us to add? */ 1612 if (sc->sc_plus != NULL) { 1613 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1614 offset += sc->sc_pluslen; 1615 1616 sc->sc_plus = NULL; 1617 } 1618 1619 subh = (struct pfsync_subheader *)(m->m_data + offset); 1620 offset += sizeof(*subh); 1621 1622 bzero(subh, sizeof(*subh)); 1623 subh->action = PFSYNC_ACT_EOF; 1624 subh->count = htons(1); 1625 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1626 1627 /* we're done, let's put it on the wire */ 1628 if (ifp->if_bpf) { 1629 m->m_data += sizeof(*ip); 1630 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1631 BPF_MTAP(ifp, m); 1632 m->m_data -= sizeof(*ip); 1633 m->m_len = m->m_pkthdr.len = sc->sc_len; 1634 } 1635 1636 if (sc->sc_sync_if == NULL) { 1637 sc->sc_len = PFSYNC_MINPKT; 1638 m_freem(m); 1639 return; 1640 } 1641 1642 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1643 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1644 sc->sc_len = PFSYNC_MINPKT; 1645 1646 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1647 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1648 else { 1649 m_freem(m); 1650 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1651 } 1652 if (schedswi) 1653 swi_sched(V_pfsync_swi_cookie, 0); 1654 } 1655 1656 static void 1657 pfsync_insert_state(struct pf_state *st) 1658 { 1659 struct pfsync_softc *sc = V_pfsyncif; 1660 1661 if (st->state_flags & PFSTATE_NOSYNC) 1662 return; 1663 1664 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1665 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1666 st->state_flags |= PFSTATE_NOSYNC; 1667 return; 1668 } 1669 1670 KASSERT(st->sync_state == PFSYNC_S_NONE, 1671 ("%s: st->sync_state %u", __func__, st->sync_state)); 1672 1673 PFSYNC_LOCK(sc); 1674 if (sc->sc_len == PFSYNC_MINPKT) 1675 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1676 1677 pfsync_q_ins(st, PFSYNC_S_INS, true); 1678 PFSYNC_UNLOCK(sc); 1679 1680 st->sync_updates = 0; 1681 } 1682 1683 static int 1684 pfsync_defer(struct pf_state *st, struct mbuf *m) 1685 { 1686 struct pfsync_softc *sc = V_pfsyncif; 1687 struct pfsync_deferral *pd; 1688 1689 if (m->m_flags & (M_BCAST|M_MCAST)) 1690 return (0); 1691 1692 PFSYNC_LOCK(sc); 1693 1694 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1695 !(sc->sc_flags & PFSYNCF_DEFER)) { 1696 PFSYNC_UNLOCK(sc); 1697 return (0); 1698 } 1699 1700 if (sc->sc_deferred >= 128) 1701 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1702 1703 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1704 if (pd == NULL) 1705 return (0); 1706 sc->sc_deferred++; 1707 1708 m->m_flags |= M_SKIP_FIREWALL; 1709 st->state_flags |= PFSTATE_ACK; 1710 1711 pd->pd_sc = sc; 1712 pd->pd_refs = 0; 1713 pd->pd_st = st; 1714 pf_ref_state(st); 1715 pd->pd_m = m; 1716 1717 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1718 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1719 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1720 1721 pfsync_push(sc); 1722 1723 return (1); 1724 } 1725 1726 static void 1727 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1728 { 1729 struct pfsync_softc *sc = pd->pd_sc; 1730 struct mbuf *m = pd->pd_m; 1731 struct pf_state *st = pd->pd_st; 1732 1733 PFSYNC_LOCK_ASSERT(sc); 1734 1735 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1736 sc->sc_deferred--; 1737 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1738 free(pd, M_PFSYNC); 1739 pf_release_state(st); 1740 1741 if (drop) 1742 m_freem(m); 1743 else { 1744 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1745 pfsync_push(sc); 1746 } 1747 } 1748 1749 static void 1750 pfsync_defer_tmo(void *arg) 1751 { 1752 struct pfsync_deferral *pd = arg; 1753 struct pfsync_softc *sc = pd->pd_sc; 1754 struct mbuf *m = pd->pd_m; 1755 struct pf_state *st = pd->pd_st; 1756 1757 PFSYNC_LOCK_ASSERT(sc); 1758 1759 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1760 1761 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1762 sc->sc_deferred--; 1763 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1764 if (pd->pd_refs == 0) 1765 free(pd, M_PFSYNC); 1766 PFSYNC_UNLOCK(sc); 1767 1768 ip_output(m, NULL, NULL, 0, NULL, NULL); 1769 1770 pf_release_state(st); 1771 1772 CURVNET_RESTORE(); 1773 } 1774 1775 static void 1776 pfsync_undefer_state(struct pf_state *st, int drop) 1777 { 1778 struct pfsync_softc *sc = V_pfsyncif; 1779 struct pfsync_deferral *pd; 1780 1781 PFSYNC_LOCK_ASSERT(sc); 1782 1783 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1784 if (pd->pd_st == st) { 1785 if (callout_stop(&pd->pd_tmo) > 0) 1786 pfsync_undefer(pd, drop); 1787 return; 1788 } 1789 } 1790 1791 panic("%s: unable to find deferred state", __func__); 1792 } 1793 1794 static void 1795 pfsync_update_state(struct pf_state *st) 1796 { 1797 struct pfsync_softc *sc = V_pfsyncif; 1798 bool sync = false, ref = true; 1799 1800 PF_STATE_LOCK_ASSERT(st); 1801 PFSYNC_LOCK(sc); 1802 1803 if (st->state_flags & PFSTATE_ACK) 1804 pfsync_undefer_state(st, 0); 1805 if (st->state_flags & PFSTATE_NOSYNC) { 1806 if (st->sync_state != PFSYNC_S_NONE) 1807 pfsync_q_del(st, true); 1808 PFSYNC_UNLOCK(sc); 1809 return; 1810 } 1811 1812 if (sc->sc_len == PFSYNC_MINPKT) 1813 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1814 1815 switch (st->sync_state) { 1816 case PFSYNC_S_UPD_C: 1817 case PFSYNC_S_UPD: 1818 case PFSYNC_S_INS: 1819 /* we're already handling it */ 1820 1821 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1822 st->sync_updates++; 1823 if (st->sync_updates >= sc->sc_maxupdates) 1824 sync = true; 1825 } 1826 break; 1827 1828 case PFSYNC_S_IACK: 1829 pfsync_q_del(st, false); 1830 ref = false; 1831 /* FALLTHROUGH */ 1832 1833 case PFSYNC_S_NONE: 1834 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1835 st->sync_updates = 0; 1836 break; 1837 1838 default: 1839 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1840 } 1841 1842 if (sync || (time_uptime - st->pfsync_time) < 2) 1843 pfsync_push(sc); 1844 1845 PFSYNC_UNLOCK(sc); 1846 } 1847 1848 static void 1849 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1850 { 1851 struct pfsync_softc *sc = V_pfsyncif; 1852 struct pfsync_upd_req_item *item; 1853 size_t nlen = sizeof(struct pfsync_upd_req); 1854 1855 PFSYNC_LOCK_ASSERT(sc); 1856 1857 /* 1858 * This code does a bit to prevent multiple update requests for the 1859 * same state being generated. It searches current subheader queue, 1860 * but it doesn't lookup into queue of already packed datagrams. 1861 */ 1862 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1863 if (item->ur_msg.id == id && 1864 item->ur_msg.creatorid == creatorid) 1865 return; 1866 1867 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1868 if (item == NULL) 1869 return; /* XXX stats */ 1870 1871 item->ur_msg.id = id; 1872 item->ur_msg.creatorid = creatorid; 1873 1874 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1875 nlen += sizeof(struct pfsync_subheader); 1876 1877 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1878 pfsync_sendout(1); 1879 1880 nlen = sizeof(struct pfsync_subheader) + 1881 sizeof(struct pfsync_upd_req); 1882 } 1883 1884 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1885 sc->sc_len += nlen; 1886 } 1887 1888 static void 1889 pfsync_update_state_req(struct pf_state *st) 1890 { 1891 struct pfsync_softc *sc = V_pfsyncif; 1892 bool ref = true; 1893 1894 PF_STATE_LOCK_ASSERT(st); 1895 PFSYNC_LOCK(sc); 1896 1897 if (st->state_flags & PFSTATE_NOSYNC) { 1898 if (st->sync_state != PFSYNC_S_NONE) 1899 pfsync_q_del(st, true); 1900 PFSYNC_UNLOCK(sc); 1901 return; 1902 } 1903 1904 switch (st->sync_state) { 1905 case PFSYNC_S_UPD_C: 1906 case PFSYNC_S_IACK: 1907 pfsync_q_del(st, false); 1908 ref = false; 1909 /* FALLTHROUGH */ 1910 1911 case PFSYNC_S_NONE: 1912 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 1913 pfsync_push(sc); 1914 break; 1915 1916 case PFSYNC_S_INS: 1917 case PFSYNC_S_UPD: 1918 case PFSYNC_S_DEL: 1919 /* we're already handling it */ 1920 break; 1921 1922 default: 1923 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1924 } 1925 1926 PFSYNC_UNLOCK(sc); 1927 } 1928 1929 static void 1930 pfsync_delete_state(struct pf_state *st) 1931 { 1932 struct pfsync_softc *sc = V_pfsyncif; 1933 bool ref = true; 1934 1935 PFSYNC_LOCK(sc); 1936 if (st->state_flags & PFSTATE_ACK) 1937 pfsync_undefer_state(st, 1); 1938 if (st->state_flags & PFSTATE_NOSYNC) { 1939 if (st->sync_state != PFSYNC_S_NONE) 1940 pfsync_q_del(st, true); 1941 PFSYNC_UNLOCK(sc); 1942 return; 1943 } 1944 1945 if (sc->sc_len == PFSYNC_MINPKT) 1946 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1947 1948 switch (st->sync_state) { 1949 case PFSYNC_S_INS: 1950 /* We never got to tell the world so just forget about it. */ 1951 pfsync_q_del(st, true); 1952 break; 1953 1954 case PFSYNC_S_UPD_C: 1955 case PFSYNC_S_UPD: 1956 case PFSYNC_S_IACK: 1957 pfsync_q_del(st, false); 1958 ref = false; 1959 /* FALLTHROUGH */ 1960 1961 case PFSYNC_S_NONE: 1962 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 1963 break; 1964 1965 default: 1966 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1967 } 1968 1969 PFSYNC_UNLOCK(sc); 1970 } 1971 1972 static void 1973 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1974 { 1975 struct pfsync_softc *sc = V_pfsyncif; 1976 struct { 1977 struct pfsync_subheader subh; 1978 struct pfsync_clr clr; 1979 } __packed r; 1980 1981 bzero(&r, sizeof(r)); 1982 1983 r.subh.action = PFSYNC_ACT_CLR; 1984 r.subh.count = htons(1); 1985 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1986 1987 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1988 r.clr.creatorid = creatorid; 1989 1990 PFSYNC_LOCK(sc); 1991 pfsync_send_plus(&r, sizeof(r)); 1992 PFSYNC_UNLOCK(sc); 1993 } 1994 1995 static void 1996 pfsync_q_ins(struct pf_state *st, int q, bool ref) 1997 { 1998 struct pfsync_softc *sc = V_pfsyncif; 1999 size_t nlen = pfsync_qs[q].len; 2000 2001 PFSYNC_LOCK_ASSERT(sc); 2002 2003 KASSERT(st->sync_state == PFSYNC_S_NONE, 2004 ("%s: st->sync_state %u", __func__, st->sync_state)); 2005 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2006 sc->sc_len)); 2007 2008 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2009 nlen += sizeof(struct pfsync_subheader); 2010 2011 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 2012 pfsync_sendout(1); 2013 2014 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2015 } 2016 2017 sc->sc_len += nlen; 2018 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2019 st->sync_state = q; 2020 if (ref) 2021 pf_ref_state(st); 2022 } 2023 2024 static void 2025 pfsync_q_del(struct pf_state *st, bool unref) 2026 { 2027 struct pfsync_softc *sc = V_pfsyncif; 2028 int q = st->sync_state; 2029 2030 PFSYNC_LOCK_ASSERT(sc); 2031 KASSERT(st->sync_state != PFSYNC_S_NONE, 2032 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2033 2034 sc->sc_len -= pfsync_qs[q].len; 2035 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2036 st->sync_state = PFSYNC_S_NONE; 2037 if (unref) 2038 pf_release_state(st); 2039 2040 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2041 sc->sc_len -= sizeof(struct pfsync_subheader); 2042 } 2043 2044 static void 2045 pfsync_bulk_start(void) 2046 { 2047 struct pfsync_softc *sc = V_pfsyncif; 2048 2049 if (V_pf_status.debug >= PF_DEBUG_MISC) 2050 printf("pfsync: received bulk update request\n"); 2051 2052 PFSYNC_BLOCK(sc); 2053 2054 sc->sc_ureq_received = time_uptime; 2055 sc->sc_bulk_hashid = 0; 2056 sc->sc_bulk_stateid = 0; 2057 pfsync_bulk_status(PFSYNC_BUS_START); 2058 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2059 PFSYNC_BUNLOCK(sc); 2060 } 2061 2062 static void 2063 pfsync_bulk_update(void *arg) 2064 { 2065 struct pfsync_softc *sc = arg; 2066 struct pf_state *s; 2067 int i, sent = 0; 2068 2069 PFSYNC_BLOCK_ASSERT(sc); 2070 CURVNET_SET(sc->sc_ifp->if_vnet); 2071 2072 /* 2073 * Start with last state from previous invocation. 2074 * It may had gone, in this case start from the 2075 * hash slot. 2076 */ 2077 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2078 2079 if (s != NULL) 2080 i = PF_IDHASH(s); 2081 else 2082 i = sc->sc_bulk_hashid; 2083 2084 for (; i <= pf_hashmask; i++) { 2085 struct pf_idhash *ih = &V_pf_idhash[i]; 2086 2087 if (s != NULL) 2088 PF_HASHROW_ASSERT(ih); 2089 else { 2090 PF_HASHROW_LOCK(ih); 2091 s = LIST_FIRST(&ih->states); 2092 } 2093 2094 for (; s; s = LIST_NEXT(s, entry)) { 2095 2096 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2097 sizeof(struct pfsync_state)) { 2098 /* We've filled a packet. */ 2099 sc->sc_bulk_hashid = i; 2100 sc->sc_bulk_stateid = s->id; 2101 sc->sc_bulk_creatorid = s->creatorid; 2102 PF_HASHROW_UNLOCK(ih); 2103 callout_reset(&sc->sc_bulk_tmo, 1, 2104 pfsync_bulk_update, sc); 2105 goto full; 2106 } 2107 2108 if (s->sync_state == PFSYNC_S_NONE && 2109 s->timeout < PFTM_MAX && 2110 s->pfsync_time <= sc->sc_ureq_received) { 2111 pfsync_update_state_req(s); 2112 sent++; 2113 } 2114 } 2115 PF_HASHROW_UNLOCK(ih); 2116 } 2117 2118 /* We're done. */ 2119 pfsync_bulk_status(PFSYNC_BUS_END); 2120 2121 full: 2122 CURVNET_RESTORE(); 2123 } 2124 2125 static void 2126 pfsync_bulk_status(u_int8_t status) 2127 { 2128 struct { 2129 struct pfsync_subheader subh; 2130 struct pfsync_bus bus; 2131 } __packed r; 2132 2133 struct pfsync_softc *sc = V_pfsyncif; 2134 2135 bzero(&r, sizeof(r)); 2136 2137 r.subh.action = PFSYNC_ACT_BUS; 2138 r.subh.count = htons(1); 2139 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2140 2141 r.bus.creatorid = V_pf_status.hostid; 2142 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2143 r.bus.status = status; 2144 2145 PFSYNC_LOCK(sc); 2146 pfsync_send_plus(&r, sizeof(r)); 2147 PFSYNC_UNLOCK(sc); 2148 } 2149 2150 static void 2151 pfsync_bulk_fail(void *arg) 2152 { 2153 struct pfsync_softc *sc = arg; 2154 2155 CURVNET_SET(sc->sc_ifp->if_vnet); 2156 2157 PFSYNC_BLOCK_ASSERT(sc); 2158 2159 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2160 /* Try again */ 2161 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2162 pfsync_bulk_fail, V_pfsyncif); 2163 PFSYNC_LOCK(sc); 2164 pfsync_request_update(0, 0); 2165 PFSYNC_UNLOCK(sc); 2166 } else { 2167 /* Pretend like the transfer was ok. */ 2168 sc->sc_ureq_sent = 0; 2169 sc->sc_bulk_tries = 0; 2170 PFSYNC_LOCK(sc); 2171 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2172 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2173 "pfsync bulk fail"); 2174 sc->sc_flags |= PFSYNCF_OK; 2175 PFSYNC_UNLOCK(sc); 2176 if (V_pf_status.debug >= PF_DEBUG_MISC) 2177 printf("pfsync: failed to receive bulk update\n"); 2178 } 2179 2180 CURVNET_RESTORE(); 2181 } 2182 2183 static void 2184 pfsync_send_plus(void *plus, size_t pluslen) 2185 { 2186 struct pfsync_softc *sc = V_pfsyncif; 2187 2188 PFSYNC_LOCK_ASSERT(sc); 2189 2190 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2191 pfsync_sendout(1); 2192 2193 sc->sc_plus = plus; 2194 sc->sc_len += (sc->sc_pluslen = pluslen); 2195 2196 pfsync_sendout(1); 2197 } 2198 2199 static void 2200 pfsync_timeout(void *arg) 2201 { 2202 struct pfsync_softc *sc = arg; 2203 2204 CURVNET_SET(sc->sc_ifp->if_vnet); 2205 PFSYNC_LOCK(sc); 2206 pfsync_push(sc); 2207 PFSYNC_UNLOCK(sc); 2208 CURVNET_RESTORE(); 2209 } 2210 2211 static void 2212 pfsync_push(struct pfsync_softc *sc) 2213 { 2214 2215 PFSYNC_LOCK_ASSERT(sc); 2216 2217 sc->sc_flags |= PFSYNCF_PUSH; 2218 swi_sched(V_pfsync_swi_cookie, 0); 2219 } 2220 2221 static void 2222 pfsyncintr(void *arg) 2223 { 2224 struct pfsync_softc *sc = arg; 2225 struct mbuf *m, *n; 2226 2227 CURVNET_SET(sc->sc_ifp->if_vnet); 2228 2229 PFSYNC_LOCK(sc); 2230 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2231 pfsync_sendout(0); 2232 sc->sc_flags &= ~PFSYNCF_PUSH; 2233 } 2234 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2235 PFSYNC_UNLOCK(sc); 2236 2237 for (; m != NULL; m = n) { 2238 2239 n = m->m_nextpkt; 2240 m->m_nextpkt = NULL; 2241 2242 /* 2243 * We distinguish between a deferral packet and our 2244 * own pfsync packet based on M_SKIP_FIREWALL 2245 * flag. This is XXX. 2246 */ 2247 if (m->m_flags & M_SKIP_FIREWALL) 2248 ip_output(m, NULL, NULL, 0, NULL, NULL); 2249 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2250 NULL) == 0) 2251 V_pfsyncstats.pfsyncs_opackets++; 2252 else 2253 V_pfsyncstats.pfsyncs_oerrors++; 2254 } 2255 CURVNET_RESTORE(); 2256 } 2257 2258 static int 2259 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2260 { 2261 struct ip_moptions *imo = &sc->sc_imo; 2262 int error; 2263 2264 if (!(ifp->if_flags & IFF_MULTICAST)) 2265 return (EADDRNOTAVAIL); 2266 2267 imo->imo_membership = (struct in_multi **)mship; 2268 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2269 imo->imo_multicast_vif = -1; 2270 2271 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2272 &imo->imo_membership[0])) != 0) { 2273 imo->imo_membership = NULL; 2274 return (error); 2275 } 2276 imo->imo_num_memberships++; 2277 imo->imo_multicast_ifp = ifp; 2278 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2279 imo->imo_multicast_loop = 0; 2280 2281 return (0); 2282 } 2283 2284 static void 2285 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2286 { 2287 struct ip_moptions *imo = &sc->sc_imo; 2288 2289 in_leavegroup(imo->imo_membership[0], NULL); 2290 free(imo->imo_membership, M_PFSYNC); 2291 imo->imo_membership = NULL; 2292 imo->imo_multicast_ifp = NULL; 2293 } 2294 2295 #ifdef INET 2296 extern struct domain inetdomain; 2297 static struct protosw in_pfsync_protosw = { 2298 .pr_type = SOCK_RAW, 2299 .pr_domain = &inetdomain, 2300 .pr_protocol = IPPROTO_PFSYNC, 2301 .pr_flags = PR_ATOMIC|PR_ADDR, 2302 .pr_input = pfsync_input, 2303 .pr_output = rip_output, 2304 .pr_ctloutput = rip_ctloutput, 2305 .pr_usrreqs = &rip_usrreqs 2306 }; 2307 #endif 2308 2309 static void 2310 pfsync_pointers_init() 2311 { 2312 2313 PF_RULES_WLOCK(); 2314 pfsync_state_import_ptr = pfsync_state_import; 2315 pfsync_insert_state_ptr = pfsync_insert_state; 2316 pfsync_update_state_ptr = pfsync_update_state; 2317 pfsync_delete_state_ptr = pfsync_delete_state; 2318 pfsync_clear_states_ptr = pfsync_clear_states; 2319 pfsync_defer_ptr = pfsync_defer; 2320 PF_RULES_WUNLOCK(); 2321 } 2322 2323 static void 2324 pfsync_pointers_uninit() 2325 { 2326 2327 PF_RULES_WLOCK(); 2328 pfsync_state_import_ptr = NULL; 2329 pfsync_insert_state_ptr = NULL; 2330 pfsync_update_state_ptr = NULL; 2331 pfsync_delete_state_ptr = NULL; 2332 pfsync_clear_states_ptr = NULL; 2333 pfsync_defer_ptr = NULL; 2334 PF_RULES_WUNLOCK(); 2335 } 2336 2337 static void 2338 vnet_pfsync_init(const void *unused __unused) 2339 { 2340 int error; 2341 2342 V_pfsync_cloner = if_clone_simple(pfsyncname, 2343 pfsync_clone_create, pfsync_clone_destroy, 1); 2344 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2345 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2346 if (error) { 2347 if_clone_detach(V_pfsync_cloner); 2348 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2349 } 2350 } 2351 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2352 vnet_pfsync_init, NULL); 2353 2354 static void 2355 vnet_pfsync_uninit(const void *unused __unused) 2356 { 2357 2358 if_clone_detach(V_pfsync_cloner); 2359 swi_remove(V_pfsync_swi_cookie); 2360 } 2361 /* 2362 * Detach after pf is gone; otherwise we might touch pfsync memory 2363 * from within pf after freeing pfsync. 2364 */ 2365 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, 2366 vnet_pfsync_uninit, NULL); 2367 2368 static int 2369 pfsync_init() 2370 { 2371 #ifdef INET 2372 int error; 2373 2374 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2375 if (error) 2376 return (error); 2377 error = ipproto_register(IPPROTO_PFSYNC); 2378 if (error) { 2379 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2380 return (error); 2381 } 2382 #endif 2383 pfsync_pointers_init(); 2384 2385 return (0); 2386 } 2387 2388 static void 2389 pfsync_uninit() 2390 { 2391 2392 pfsync_pointers_uninit(); 2393 2394 #ifdef INET 2395 ipproto_unregister(IPPROTO_PFSYNC); 2396 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2397 #endif 2398 } 2399 2400 static int 2401 pfsync_modevent(module_t mod, int type, void *data) 2402 { 2403 int error = 0; 2404 2405 switch (type) { 2406 case MOD_LOAD: 2407 error = pfsync_init(); 2408 break; 2409 case MOD_QUIESCE: 2410 /* 2411 * Module should not be unloaded due to race conditions. 2412 */ 2413 error = EBUSY; 2414 break; 2415 case MOD_UNLOAD: 2416 pfsync_uninit(); 2417 break; 2418 default: 2419 error = EINVAL; 2420 break; 2421 } 2422 2423 return (error); 2424 } 2425 2426 static moduledata_t pfsync_mod = { 2427 pfsyncname, 2428 pfsync_modevent, 2429 0 2430 }; 2431 2432 #define PFSYNC_MODVER 1 2433 2434 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2435 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2436 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2437 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2438