1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND MIT 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/priv.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/sockio.h> 82 #include <sys/sysctl.h> 83 #include <sys/syslog.h> 84 85 #include <net/bpf.h> 86 #include <net/if.h> 87 #include <net/if_var.h> 88 #include <net/if_clone.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/if_pfsync.h> 93 94 #include <netinet/if_ether.h> 95 #include <netinet/in.h> 96 #include <netinet/in_var.h> 97 #include <netinet/ip.h> 98 #include <netinet/ip_carp.h> 99 #include <netinet/ip_var.h> 100 #include <netinet/tcp.h> 101 #include <netinet/tcp_fsm.h> 102 #include <netinet/tcp_seq.h> 103 104 #define PFSYNC_MINPKT ( \ 105 sizeof(struct ip) + \ 106 sizeof(struct pfsync_header) + \ 107 sizeof(struct pfsync_subheader) ) 108 109 struct pfsync_pkt { 110 struct ip *ip; 111 struct in_addr src; 112 u_int8_t flags; 113 }; 114 115 static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 116 struct pfsync_state_peer *); 117 static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 118 static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 119 static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 120 static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 121 static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 122 static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 123 static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 124 static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 125 static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 126 static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 127 static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 128 static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 129 130 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 131 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 132 pfsync_in_ins, /* PFSYNC_ACT_INS */ 133 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 134 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 135 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 136 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 137 pfsync_in_del, /* PFSYNC_ACT_DEL */ 138 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 139 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 140 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 141 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 142 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 143 pfsync_in_eof /* PFSYNC_ACT_EOF */ 144 }; 145 146 struct pfsync_q { 147 void (*write)(struct pf_state *, void *); 148 size_t len; 149 u_int8_t action; 150 }; 151 152 /* we have one of these for every PFSYNC_S_ */ 153 static void pfsync_out_state(struct pf_state *, void *); 154 static void pfsync_out_iack(struct pf_state *, void *); 155 static void pfsync_out_upd_c(struct pf_state *, void *); 156 static void pfsync_out_del(struct pf_state *, void *); 157 158 static struct pfsync_q pfsync_qs[] = { 159 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 160 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 161 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 162 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 163 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 164 }; 165 166 static void pfsync_q_ins(struct pf_state *, int, bool); 167 static void pfsync_q_del(struct pf_state *, bool); 168 169 static void pfsync_update_state(struct pf_state *); 170 171 struct pfsync_upd_req_item { 172 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 173 struct pfsync_upd_req ur_msg; 174 }; 175 176 struct pfsync_deferral { 177 struct pfsync_softc *pd_sc; 178 TAILQ_ENTRY(pfsync_deferral) pd_entry; 179 u_int pd_refs; 180 struct callout pd_tmo; 181 182 struct pf_state *pd_st; 183 struct mbuf *pd_m; 184 }; 185 186 struct pfsync_softc { 187 /* Configuration */ 188 struct ifnet *sc_ifp; 189 struct ifnet *sc_sync_if; 190 struct ip_moptions sc_imo; 191 struct in_addr sc_sync_peer; 192 uint32_t sc_flags; 193 #define PFSYNCF_OK 0x00000001 194 #define PFSYNCF_DEFER 0x00000002 195 #define PFSYNCF_PUSH 0x00000004 196 uint8_t sc_maxupdates; 197 struct ip sc_template; 198 struct callout sc_tmo; 199 struct mtx sc_mtx; 200 201 /* Queued data */ 202 size_t sc_len; 203 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 204 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 205 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 206 u_int sc_deferred; 207 void *sc_plus; 208 size_t sc_pluslen; 209 210 /* Bulk update info */ 211 struct mtx sc_bulk_mtx; 212 uint32_t sc_ureq_sent; 213 int sc_bulk_tries; 214 uint32_t sc_ureq_received; 215 int sc_bulk_hashid; 216 uint64_t sc_bulk_stateid; 217 uint32_t sc_bulk_creatorid; 218 struct callout sc_bulk_tmo; 219 struct callout sc_bulkfail_tmo; 220 }; 221 222 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 223 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 224 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 225 226 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 227 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 228 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 229 230 static const char pfsyncname[] = "pfsync"; 231 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 232 static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; 233 #define V_pfsyncif VNET(pfsyncif) 234 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; 235 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 236 static VNET_DEFINE(struct pfsyncstats, pfsyncstats); 237 #define V_pfsyncstats VNET(pfsyncstats) 238 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; 239 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 240 241 static void pfsync_timeout(void *); 242 static void pfsync_push(struct pfsync_softc *); 243 static void pfsyncintr(void *); 244 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 245 void *); 246 static void pfsync_multicast_cleanup(struct pfsync_softc *); 247 static void pfsync_pointers_init(void); 248 static void pfsync_pointers_uninit(void); 249 static int pfsync_init(void); 250 static void pfsync_uninit(void); 251 252 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 253 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 254 &VNET_NAME(pfsyncstats), pfsyncstats, 255 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 256 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 257 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 258 259 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 260 static void pfsync_clone_destroy(struct ifnet *); 261 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 262 struct pf_state_peer *); 263 static int pfsyncoutput(struct ifnet *, struct mbuf *, 264 const struct sockaddr *, struct route *); 265 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 266 267 static int pfsync_defer(struct pf_state *, struct mbuf *); 268 static void pfsync_undefer(struct pfsync_deferral *, int); 269 static void pfsync_undefer_state(struct pf_state *, int); 270 static void pfsync_defer_tmo(void *); 271 272 static void pfsync_request_update(u_int32_t, u_int64_t); 273 static void pfsync_update_state_req(struct pf_state *); 274 275 static void pfsync_drop(struct pfsync_softc *); 276 static void pfsync_sendout(int); 277 static void pfsync_send_plus(void *, size_t); 278 279 static void pfsync_bulk_start(void); 280 static void pfsync_bulk_status(u_int8_t); 281 static void pfsync_bulk_update(void *); 282 static void pfsync_bulk_fail(void *); 283 284 #ifdef IPSEC 285 static void pfsync_update_net_tdb(struct pfsync_tdb *); 286 #endif 287 288 #define PFSYNC_MAX_BULKTRIES 12 289 290 VNET_DEFINE(struct if_clone *, pfsync_cloner); 291 #define V_pfsync_cloner VNET(pfsync_cloner) 292 293 static int 294 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 295 { 296 struct pfsync_softc *sc; 297 struct ifnet *ifp; 298 int q; 299 300 if (unit != 0) 301 return (EINVAL); 302 303 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 304 sc->sc_flags |= PFSYNCF_OK; 305 306 for (q = 0; q < PFSYNC_S_COUNT; q++) 307 TAILQ_INIT(&sc->sc_qs[q]); 308 309 TAILQ_INIT(&sc->sc_upd_req_list); 310 TAILQ_INIT(&sc->sc_deferrals); 311 312 sc->sc_len = PFSYNC_MINPKT; 313 sc->sc_maxupdates = 128; 314 315 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 316 if (ifp == NULL) { 317 free(sc, M_PFSYNC); 318 return (ENOSPC); 319 } 320 if_initname(ifp, pfsyncname, unit); 321 ifp->if_softc = sc; 322 ifp->if_ioctl = pfsyncioctl; 323 ifp->if_output = pfsyncoutput; 324 ifp->if_type = IFT_PFSYNC; 325 ifp->if_snd.ifq_maxlen = ifqmaxlen; 326 ifp->if_hdrlen = sizeof(struct pfsync_header); 327 ifp->if_mtu = ETHERMTU; 328 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 329 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 330 callout_init(&sc->sc_tmo, 1); 331 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 332 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 333 334 if_attach(ifp); 335 336 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 337 338 V_pfsyncif = sc; 339 340 return (0); 341 } 342 343 static void 344 pfsync_clone_destroy(struct ifnet *ifp) 345 { 346 struct pfsync_softc *sc = ifp->if_softc; 347 348 /* 349 * At this stage, everything should have already been 350 * cleared by pfsync_uninit(), and we have only to 351 * drain callouts. 352 */ 353 while (sc->sc_deferred > 0) { 354 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 355 356 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 357 sc->sc_deferred--; 358 if (callout_stop(&pd->pd_tmo) > 0) { 359 pf_release_state(pd->pd_st); 360 m_freem(pd->pd_m); 361 free(pd, M_PFSYNC); 362 } else { 363 pd->pd_refs++; 364 callout_drain(&pd->pd_tmo); 365 free(pd, M_PFSYNC); 366 } 367 } 368 369 callout_drain(&sc->sc_tmo); 370 callout_drain(&sc->sc_bulkfail_tmo); 371 callout_drain(&sc->sc_bulk_tmo); 372 373 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 374 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 375 bpfdetach(ifp); 376 if_detach(ifp); 377 378 pfsync_drop(sc); 379 380 if_free(ifp); 381 if (sc->sc_imo.imo_membership) 382 pfsync_multicast_cleanup(sc); 383 mtx_destroy(&sc->sc_mtx); 384 mtx_destroy(&sc->sc_bulk_mtx); 385 free(sc, M_PFSYNC); 386 387 V_pfsyncif = NULL; 388 } 389 390 static int 391 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 392 struct pf_state_peer *d) 393 { 394 if (s->scrub.scrub_flag && d->scrub == NULL) { 395 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 396 if (d->scrub == NULL) 397 return (ENOMEM); 398 } 399 400 return (0); 401 } 402 403 404 static int 405 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 406 { 407 struct pfsync_softc *sc = V_pfsyncif; 408 #ifndef __NO_STRICT_ALIGNMENT 409 struct pfsync_state_key key[2]; 410 #endif 411 struct pfsync_state_key *kw, *ks; 412 struct pf_state *st = NULL; 413 struct pf_state_key *skw = NULL, *sks = NULL; 414 struct pf_rule *r = NULL; 415 struct pfi_kif *kif; 416 int error; 417 418 PF_RULES_RASSERT(); 419 420 if (sp->creatorid == 0) { 421 if (V_pf_status.debug >= PF_DEBUG_MISC) 422 printf("%s: invalid creator id: %08x\n", __func__, 423 ntohl(sp->creatorid)); 424 return (EINVAL); 425 } 426 427 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 428 if (V_pf_status.debug >= PF_DEBUG_MISC) 429 printf("%s: unknown interface: %s\n", __func__, 430 sp->ifname); 431 if (flags & PFSYNC_SI_IOCTL) 432 return (EINVAL); 433 return (0); /* skip this state */ 434 } 435 436 /* 437 * If the ruleset checksums match or the state is coming from the ioctl, 438 * it's safe to associate the state with the rule of that number. 439 */ 440 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 441 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 442 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 443 r = pf_main_ruleset.rules[ 444 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 445 else 446 r = &V_pf_default_rule; 447 448 if ((r->max_states && 449 counter_u64_fetch(r->states_cur) >= r->max_states)) 450 goto cleanup; 451 452 /* 453 * XXXGL: consider M_WAITOK in ioctl path after. 454 */ 455 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 456 goto cleanup; 457 458 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 459 goto cleanup; 460 461 #ifndef __NO_STRICT_ALIGNMENT 462 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 463 kw = &key[PF_SK_WIRE]; 464 ks = &key[PF_SK_STACK]; 465 #else 466 kw = &sp->key[PF_SK_WIRE]; 467 ks = &sp->key[PF_SK_STACK]; 468 #endif 469 470 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 471 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 472 kw->port[0] != ks->port[0] || 473 kw->port[1] != ks->port[1]) { 474 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 475 if (sks == NULL) 476 goto cleanup; 477 } else 478 sks = skw; 479 480 /* allocate memory for scrub info */ 481 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 482 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 483 goto cleanup; 484 485 /* Copy to state key(s). */ 486 skw->addr[0] = kw->addr[0]; 487 skw->addr[1] = kw->addr[1]; 488 skw->port[0] = kw->port[0]; 489 skw->port[1] = kw->port[1]; 490 skw->proto = sp->proto; 491 skw->af = sp->af; 492 if (sks != skw) { 493 sks->addr[0] = ks->addr[0]; 494 sks->addr[1] = ks->addr[1]; 495 sks->port[0] = ks->port[0]; 496 sks->port[1] = ks->port[1]; 497 sks->proto = sp->proto; 498 sks->af = sp->af; 499 } 500 501 /* copy to state */ 502 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 503 st->creation = time_uptime - ntohl(sp->creation); 504 st->expire = time_uptime; 505 if (sp->expire) { 506 uint32_t timeout; 507 508 timeout = r->timeout[sp->timeout]; 509 if (!timeout) 510 timeout = V_pf_default_rule.timeout[sp->timeout]; 511 512 /* sp->expire may have been adaptively scaled by export. */ 513 st->expire -= timeout - ntohl(sp->expire); 514 } 515 516 st->direction = sp->direction; 517 st->log = sp->log; 518 st->timeout = sp->timeout; 519 st->state_flags = sp->state_flags; 520 521 st->id = sp->id; 522 st->creatorid = sp->creatorid; 523 pf_state_peer_ntoh(&sp->src, &st->src); 524 pf_state_peer_ntoh(&sp->dst, &st->dst); 525 526 st->rule.ptr = r; 527 st->nat_rule.ptr = NULL; 528 st->anchor.ptr = NULL; 529 st->rt_kif = NULL; 530 531 st->pfsync_time = time_uptime; 532 st->sync_state = PFSYNC_S_NONE; 533 534 if (!(flags & PFSYNC_SI_IOCTL)) 535 st->state_flags |= PFSTATE_NOSYNC; 536 537 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) 538 goto cleanup_state; 539 540 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 541 counter_u64_add(r->states_cur, 1); 542 counter_u64_add(r->states_tot, 1); 543 544 if (!(flags & PFSYNC_SI_IOCTL)) { 545 st->state_flags &= ~PFSTATE_NOSYNC; 546 if (st->state_flags & PFSTATE_ACK) { 547 pfsync_q_ins(st, PFSYNC_S_IACK, true); 548 pfsync_push(sc); 549 } 550 } 551 st->state_flags &= ~PFSTATE_ACK; 552 PF_STATE_UNLOCK(st); 553 554 return (0); 555 556 cleanup: 557 error = ENOMEM; 558 if (skw == sks) 559 sks = NULL; 560 if (skw != NULL) 561 uma_zfree(V_pf_state_key_z, skw); 562 if (sks != NULL) 563 uma_zfree(V_pf_state_key_z, sks); 564 565 cleanup_state: /* pf_state_insert() frees the state keys. */ 566 if (st) { 567 if (st->dst.scrub) 568 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 569 if (st->src.scrub) 570 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 571 uma_zfree(V_pf_state_z, st); 572 } 573 return (error); 574 } 575 576 static int 577 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 578 { 579 struct pfsync_softc *sc = V_pfsyncif; 580 struct pfsync_pkt pkt; 581 struct mbuf *m = *mp; 582 struct ip *ip = mtod(m, struct ip *); 583 struct pfsync_header *ph; 584 struct pfsync_subheader subh; 585 586 int offset, len; 587 int rv; 588 uint16_t count; 589 590 *mp = NULL; 591 V_pfsyncstats.pfsyncs_ipackets++; 592 593 /* Verify that we have a sync interface configured. */ 594 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 595 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 596 goto done; 597 598 /* verify that the packet came in on the right interface */ 599 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 600 V_pfsyncstats.pfsyncs_badif++; 601 goto done; 602 } 603 604 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 605 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 606 /* verify that the IP TTL is 255. */ 607 if (ip->ip_ttl != PFSYNC_DFLTTL) { 608 V_pfsyncstats.pfsyncs_badttl++; 609 goto done; 610 } 611 612 offset = ip->ip_hl << 2; 613 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 614 V_pfsyncstats.pfsyncs_hdrops++; 615 goto done; 616 } 617 618 if (offset + sizeof(*ph) > m->m_len) { 619 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 620 V_pfsyncstats.pfsyncs_hdrops++; 621 return (IPPROTO_DONE); 622 } 623 ip = mtod(m, struct ip *); 624 } 625 ph = (struct pfsync_header *)((char *)ip + offset); 626 627 /* verify the version */ 628 if (ph->version != PFSYNC_VERSION) { 629 V_pfsyncstats.pfsyncs_badver++; 630 goto done; 631 } 632 633 len = ntohs(ph->len) + offset; 634 if (m->m_pkthdr.len < len) { 635 V_pfsyncstats.pfsyncs_badlen++; 636 goto done; 637 } 638 639 /* Cheaper to grab this now than having to mess with mbufs later */ 640 pkt.ip = ip; 641 pkt.src = ip->ip_src; 642 pkt.flags = 0; 643 644 /* 645 * Trusting pf_chksum during packet processing, as well as seeking 646 * in interface name tree, require holding PF_RULES_RLOCK(). 647 */ 648 PF_RULES_RLOCK(); 649 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 650 pkt.flags |= PFSYNC_SI_CKSUM; 651 652 offset += sizeof(*ph); 653 while (offset <= len - sizeof(subh)) { 654 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 655 offset += sizeof(subh); 656 657 if (subh.action >= PFSYNC_ACT_MAX) { 658 V_pfsyncstats.pfsyncs_badact++; 659 PF_RULES_RUNLOCK(); 660 goto done; 661 } 662 663 count = ntohs(subh.count); 664 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 665 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 666 if (rv == -1) { 667 PF_RULES_RUNLOCK(); 668 return (IPPROTO_DONE); 669 } 670 671 offset += rv; 672 } 673 PF_RULES_RUNLOCK(); 674 675 done: 676 m_freem(m); 677 return (IPPROTO_DONE); 678 } 679 680 static int 681 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 682 { 683 struct pfsync_clr *clr; 684 struct mbuf *mp; 685 int len = sizeof(*clr) * count; 686 int i, offp; 687 u_int32_t creatorid; 688 689 mp = m_pulldown(m, offset, len, &offp); 690 if (mp == NULL) { 691 V_pfsyncstats.pfsyncs_badlen++; 692 return (-1); 693 } 694 clr = (struct pfsync_clr *)(mp->m_data + offp); 695 696 for (i = 0; i < count; i++) { 697 creatorid = clr[i].creatorid; 698 699 if (clr[i].ifname[0] != '\0' && 700 pfi_kif_find(clr[i].ifname) == NULL) 701 continue; 702 703 for (int i = 0; i <= pf_hashmask; i++) { 704 struct pf_idhash *ih = &V_pf_idhash[i]; 705 struct pf_state *s; 706 relock: 707 PF_HASHROW_LOCK(ih); 708 LIST_FOREACH(s, &ih->states, entry) { 709 if (s->creatorid == creatorid) { 710 s->state_flags |= PFSTATE_NOSYNC; 711 pf_unlink_state(s, PF_ENTER_LOCKED); 712 goto relock; 713 } 714 } 715 PF_HASHROW_UNLOCK(ih); 716 } 717 } 718 719 return (len); 720 } 721 722 static int 723 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 724 { 725 struct mbuf *mp; 726 struct pfsync_state *sa, *sp; 727 int len = sizeof(*sp) * count; 728 int i, offp; 729 730 mp = m_pulldown(m, offset, len, &offp); 731 if (mp == NULL) { 732 V_pfsyncstats.pfsyncs_badlen++; 733 return (-1); 734 } 735 sa = (struct pfsync_state *)(mp->m_data + offp); 736 737 for (i = 0; i < count; i++) { 738 sp = &sa[i]; 739 740 /* Check for invalid values. */ 741 if (sp->timeout >= PFTM_MAX || 742 sp->src.state > PF_TCPS_PROXY_DST || 743 sp->dst.state > PF_TCPS_PROXY_DST || 744 sp->direction > PF_OUT || 745 (sp->af != AF_INET && sp->af != AF_INET6)) { 746 if (V_pf_status.debug >= PF_DEBUG_MISC) 747 printf("%s: invalid value\n", __func__); 748 V_pfsyncstats.pfsyncs_badval++; 749 continue; 750 } 751 752 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 753 /* Drop out, but process the rest of the actions. */ 754 break; 755 } 756 757 return (len); 758 } 759 760 static int 761 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 762 { 763 struct pfsync_ins_ack *ia, *iaa; 764 struct pf_state *st; 765 766 struct mbuf *mp; 767 int len = count * sizeof(*ia); 768 int offp, i; 769 770 mp = m_pulldown(m, offset, len, &offp); 771 if (mp == NULL) { 772 V_pfsyncstats.pfsyncs_badlen++; 773 return (-1); 774 } 775 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 776 777 for (i = 0; i < count; i++) { 778 ia = &iaa[i]; 779 780 st = pf_find_state_byid(ia->id, ia->creatorid); 781 if (st == NULL) 782 continue; 783 784 if (st->state_flags & PFSTATE_ACK) { 785 PFSYNC_LOCK(V_pfsyncif); 786 pfsync_undefer_state(st, 0); 787 PFSYNC_UNLOCK(V_pfsyncif); 788 } 789 PF_STATE_UNLOCK(st); 790 } 791 /* 792 * XXX this is not yet implemented, but we know the size of the 793 * message so we can skip it. 794 */ 795 796 return (count * sizeof(struct pfsync_ins_ack)); 797 } 798 799 static int 800 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 801 struct pfsync_state_peer *dst) 802 { 803 int sync = 0; 804 805 PF_STATE_LOCK_ASSERT(st); 806 807 /* 808 * The state should never go backwards except 809 * for syn-proxy states. Neither should the 810 * sequence window slide backwards. 811 */ 812 if ((st->src.state > src->state && 813 (st->src.state < PF_TCPS_PROXY_SRC || 814 src->state >= PF_TCPS_PROXY_SRC)) || 815 816 (st->src.state == src->state && 817 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 818 sync++; 819 else 820 pf_state_peer_ntoh(src, &st->src); 821 822 if ((st->dst.state > dst->state) || 823 824 (st->dst.state >= TCPS_SYN_SENT && 825 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 826 sync++; 827 else 828 pf_state_peer_ntoh(dst, &st->dst); 829 830 return (sync); 831 } 832 833 static int 834 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 835 { 836 struct pfsync_softc *sc = V_pfsyncif; 837 struct pfsync_state *sa, *sp; 838 struct pf_state *st; 839 int sync; 840 841 struct mbuf *mp; 842 int len = count * sizeof(*sp); 843 int offp, i; 844 845 mp = m_pulldown(m, offset, len, &offp); 846 if (mp == NULL) { 847 V_pfsyncstats.pfsyncs_badlen++; 848 return (-1); 849 } 850 sa = (struct pfsync_state *)(mp->m_data + offp); 851 852 for (i = 0; i < count; i++) { 853 sp = &sa[i]; 854 855 /* check for invalid values */ 856 if (sp->timeout >= PFTM_MAX || 857 sp->src.state > PF_TCPS_PROXY_DST || 858 sp->dst.state > PF_TCPS_PROXY_DST) { 859 if (V_pf_status.debug >= PF_DEBUG_MISC) { 860 printf("pfsync_input: PFSYNC_ACT_UPD: " 861 "invalid value\n"); 862 } 863 V_pfsyncstats.pfsyncs_badval++; 864 continue; 865 } 866 867 st = pf_find_state_byid(sp->id, sp->creatorid); 868 if (st == NULL) { 869 /* insert the update */ 870 if (pfsync_state_import(sp, 0)) 871 V_pfsyncstats.pfsyncs_badstate++; 872 continue; 873 } 874 875 if (st->state_flags & PFSTATE_ACK) { 876 PFSYNC_LOCK(sc); 877 pfsync_undefer_state(st, 1); 878 PFSYNC_UNLOCK(sc); 879 } 880 881 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 882 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 883 else { 884 sync = 0; 885 886 /* 887 * Non-TCP protocol state machine always go 888 * forwards 889 */ 890 if (st->src.state > sp->src.state) 891 sync++; 892 else 893 pf_state_peer_ntoh(&sp->src, &st->src); 894 if (st->dst.state > sp->dst.state) 895 sync++; 896 else 897 pf_state_peer_ntoh(&sp->dst, &st->dst); 898 } 899 if (sync < 2) { 900 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 901 pf_state_peer_ntoh(&sp->dst, &st->dst); 902 st->expire = time_uptime; 903 st->timeout = sp->timeout; 904 } 905 st->pfsync_time = time_uptime; 906 907 if (sync) { 908 V_pfsyncstats.pfsyncs_stale++; 909 910 pfsync_update_state(st); 911 PF_STATE_UNLOCK(st); 912 PFSYNC_LOCK(sc); 913 pfsync_push(sc); 914 PFSYNC_UNLOCK(sc); 915 continue; 916 } 917 PF_STATE_UNLOCK(st); 918 } 919 920 return (len); 921 } 922 923 static int 924 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 925 { 926 struct pfsync_softc *sc = V_pfsyncif; 927 struct pfsync_upd_c *ua, *up; 928 struct pf_state *st; 929 int len = count * sizeof(*up); 930 int sync; 931 struct mbuf *mp; 932 int offp, i; 933 934 mp = m_pulldown(m, offset, len, &offp); 935 if (mp == NULL) { 936 V_pfsyncstats.pfsyncs_badlen++; 937 return (-1); 938 } 939 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 940 941 for (i = 0; i < count; i++) { 942 up = &ua[i]; 943 944 /* check for invalid values */ 945 if (up->timeout >= PFTM_MAX || 946 up->src.state > PF_TCPS_PROXY_DST || 947 up->dst.state > PF_TCPS_PROXY_DST) { 948 if (V_pf_status.debug >= PF_DEBUG_MISC) { 949 printf("pfsync_input: " 950 "PFSYNC_ACT_UPD_C: " 951 "invalid value\n"); 952 } 953 V_pfsyncstats.pfsyncs_badval++; 954 continue; 955 } 956 957 st = pf_find_state_byid(up->id, up->creatorid); 958 if (st == NULL) { 959 /* We don't have this state. Ask for it. */ 960 PFSYNC_LOCK(sc); 961 pfsync_request_update(up->creatorid, up->id); 962 PFSYNC_UNLOCK(sc); 963 continue; 964 } 965 966 if (st->state_flags & PFSTATE_ACK) { 967 PFSYNC_LOCK(sc); 968 pfsync_undefer_state(st, 1); 969 PFSYNC_UNLOCK(sc); 970 } 971 972 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 973 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 974 else { 975 sync = 0; 976 977 /* 978 * Non-TCP protocol state machine always go 979 * forwards 980 */ 981 if (st->src.state > up->src.state) 982 sync++; 983 else 984 pf_state_peer_ntoh(&up->src, &st->src); 985 if (st->dst.state > up->dst.state) 986 sync++; 987 else 988 pf_state_peer_ntoh(&up->dst, &st->dst); 989 } 990 if (sync < 2) { 991 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 992 pf_state_peer_ntoh(&up->dst, &st->dst); 993 st->expire = time_uptime; 994 st->timeout = up->timeout; 995 } 996 st->pfsync_time = time_uptime; 997 998 if (sync) { 999 V_pfsyncstats.pfsyncs_stale++; 1000 1001 pfsync_update_state(st); 1002 PF_STATE_UNLOCK(st); 1003 PFSYNC_LOCK(sc); 1004 pfsync_push(sc); 1005 PFSYNC_UNLOCK(sc); 1006 continue; 1007 } 1008 PF_STATE_UNLOCK(st); 1009 } 1010 1011 return (len); 1012 } 1013 1014 static int 1015 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1016 { 1017 struct pfsync_upd_req *ur, *ura; 1018 struct mbuf *mp; 1019 int len = count * sizeof(*ur); 1020 int i, offp; 1021 1022 struct pf_state *st; 1023 1024 mp = m_pulldown(m, offset, len, &offp); 1025 if (mp == NULL) { 1026 V_pfsyncstats.pfsyncs_badlen++; 1027 return (-1); 1028 } 1029 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1030 1031 for (i = 0; i < count; i++) { 1032 ur = &ura[i]; 1033 1034 if (ur->id == 0 && ur->creatorid == 0) 1035 pfsync_bulk_start(); 1036 else { 1037 st = pf_find_state_byid(ur->id, ur->creatorid); 1038 if (st == NULL) { 1039 V_pfsyncstats.pfsyncs_badstate++; 1040 continue; 1041 } 1042 if (st->state_flags & PFSTATE_NOSYNC) { 1043 PF_STATE_UNLOCK(st); 1044 continue; 1045 } 1046 1047 pfsync_update_state_req(st); 1048 PF_STATE_UNLOCK(st); 1049 } 1050 } 1051 1052 return (len); 1053 } 1054 1055 static int 1056 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1057 { 1058 struct mbuf *mp; 1059 struct pfsync_state *sa, *sp; 1060 struct pf_state *st; 1061 int len = count * sizeof(*sp); 1062 int offp, i; 1063 1064 mp = m_pulldown(m, offset, len, &offp); 1065 if (mp == NULL) { 1066 V_pfsyncstats.pfsyncs_badlen++; 1067 return (-1); 1068 } 1069 sa = (struct pfsync_state *)(mp->m_data + offp); 1070 1071 for (i = 0; i < count; i++) { 1072 sp = &sa[i]; 1073 1074 st = pf_find_state_byid(sp->id, sp->creatorid); 1075 if (st == NULL) { 1076 V_pfsyncstats.pfsyncs_badstate++; 1077 continue; 1078 } 1079 st->state_flags |= PFSTATE_NOSYNC; 1080 pf_unlink_state(st, PF_ENTER_LOCKED); 1081 } 1082 1083 return (len); 1084 } 1085 1086 static int 1087 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1088 { 1089 struct mbuf *mp; 1090 struct pfsync_del_c *sa, *sp; 1091 struct pf_state *st; 1092 int len = count * sizeof(*sp); 1093 int offp, i; 1094 1095 mp = m_pulldown(m, offset, len, &offp); 1096 if (mp == NULL) { 1097 V_pfsyncstats.pfsyncs_badlen++; 1098 return (-1); 1099 } 1100 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1101 1102 for (i = 0; i < count; i++) { 1103 sp = &sa[i]; 1104 1105 st = pf_find_state_byid(sp->id, sp->creatorid); 1106 if (st == NULL) { 1107 V_pfsyncstats.pfsyncs_badstate++; 1108 continue; 1109 } 1110 1111 st->state_flags |= PFSTATE_NOSYNC; 1112 pf_unlink_state(st, PF_ENTER_LOCKED); 1113 } 1114 1115 return (len); 1116 } 1117 1118 static int 1119 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1120 { 1121 struct pfsync_softc *sc = V_pfsyncif; 1122 struct pfsync_bus *bus; 1123 struct mbuf *mp; 1124 int len = count * sizeof(*bus); 1125 int offp; 1126 1127 PFSYNC_BLOCK(sc); 1128 1129 /* If we're not waiting for a bulk update, who cares. */ 1130 if (sc->sc_ureq_sent == 0) { 1131 PFSYNC_BUNLOCK(sc); 1132 return (len); 1133 } 1134 1135 mp = m_pulldown(m, offset, len, &offp); 1136 if (mp == NULL) { 1137 PFSYNC_BUNLOCK(sc); 1138 V_pfsyncstats.pfsyncs_badlen++; 1139 return (-1); 1140 } 1141 bus = (struct pfsync_bus *)(mp->m_data + offp); 1142 1143 switch (bus->status) { 1144 case PFSYNC_BUS_START: 1145 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1146 V_pf_limits[PF_LIMIT_STATES].limit / 1147 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1148 sizeof(struct pfsync_state)), 1149 pfsync_bulk_fail, sc); 1150 if (V_pf_status.debug >= PF_DEBUG_MISC) 1151 printf("pfsync: received bulk update start\n"); 1152 break; 1153 1154 case PFSYNC_BUS_END: 1155 if (time_uptime - ntohl(bus->endtime) >= 1156 sc->sc_ureq_sent) { 1157 /* that's it, we're happy */ 1158 sc->sc_ureq_sent = 0; 1159 sc->sc_bulk_tries = 0; 1160 callout_stop(&sc->sc_bulkfail_tmo); 1161 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1162 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1163 "pfsync bulk done"); 1164 sc->sc_flags |= PFSYNCF_OK; 1165 if (V_pf_status.debug >= PF_DEBUG_MISC) 1166 printf("pfsync: received valid " 1167 "bulk update end\n"); 1168 } else { 1169 if (V_pf_status.debug >= PF_DEBUG_MISC) 1170 printf("pfsync: received invalid " 1171 "bulk update end: bad timestamp\n"); 1172 } 1173 break; 1174 } 1175 PFSYNC_BUNLOCK(sc); 1176 1177 return (len); 1178 } 1179 1180 static int 1181 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1182 { 1183 int len = count * sizeof(struct pfsync_tdb); 1184 1185 #if defined(IPSEC) 1186 struct pfsync_tdb *tp; 1187 struct mbuf *mp; 1188 int offp; 1189 int i; 1190 int s; 1191 1192 mp = m_pulldown(m, offset, len, &offp); 1193 if (mp == NULL) { 1194 V_pfsyncstats.pfsyncs_badlen++; 1195 return (-1); 1196 } 1197 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1198 1199 for (i = 0; i < count; i++) 1200 pfsync_update_net_tdb(&tp[i]); 1201 #endif 1202 1203 return (len); 1204 } 1205 1206 #if defined(IPSEC) 1207 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1208 static void 1209 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1210 { 1211 struct tdb *tdb; 1212 int s; 1213 1214 /* check for invalid values */ 1215 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1216 (pt->dst.sa.sa_family != AF_INET && 1217 pt->dst.sa.sa_family != AF_INET6)) 1218 goto bad; 1219 1220 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1221 if (tdb) { 1222 pt->rpl = ntohl(pt->rpl); 1223 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1224 1225 /* Neither replay nor byte counter should ever decrease. */ 1226 if (pt->rpl < tdb->tdb_rpl || 1227 pt->cur_bytes < tdb->tdb_cur_bytes) { 1228 goto bad; 1229 } 1230 1231 tdb->tdb_rpl = pt->rpl; 1232 tdb->tdb_cur_bytes = pt->cur_bytes; 1233 } 1234 return; 1235 1236 bad: 1237 if (V_pf_status.debug >= PF_DEBUG_MISC) 1238 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1239 "invalid value\n"); 1240 V_pfsyncstats.pfsyncs_badstate++; 1241 return; 1242 } 1243 #endif 1244 1245 1246 static int 1247 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1248 { 1249 /* check if we are at the right place in the packet */ 1250 if (offset != m->m_pkthdr.len) 1251 V_pfsyncstats.pfsyncs_badlen++; 1252 1253 /* we're done. free and let the caller return */ 1254 m_freem(m); 1255 return (-1); 1256 } 1257 1258 static int 1259 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1260 { 1261 V_pfsyncstats.pfsyncs_badact++; 1262 1263 m_freem(m); 1264 return (-1); 1265 } 1266 1267 static int 1268 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1269 struct route *rt) 1270 { 1271 m_freem(m); 1272 return (0); 1273 } 1274 1275 /* ARGSUSED */ 1276 static int 1277 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1278 { 1279 struct pfsync_softc *sc = ifp->if_softc; 1280 struct ifreq *ifr = (struct ifreq *)data; 1281 struct pfsyncreq pfsyncr; 1282 int error; 1283 1284 switch (cmd) { 1285 case SIOCSIFFLAGS: 1286 PFSYNC_LOCK(sc); 1287 if (ifp->if_flags & IFF_UP) { 1288 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1289 PFSYNC_UNLOCK(sc); 1290 pfsync_pointers_init(); 1291 } else { 1292 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1293 PFSYNC_UNLOCK(sc); 1294 pfsync_pointers_uninit(); 1295 } 1296 break; 1297 case SIOCSIFMTU: 1298 if (!sc->sc_sync_if || 1299 ifr->ifr_mtu <= PFSYNC_MINPKT || 1300 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1301 return (EINVAL); 1302 if (ifr->ifr_mtu < ifp->if_mtu) { 1303 PFSYNC_LOCK(sc); 1304 if (sc->sc_len > PFSYNC_MINPKT) 1305 pfsync_sendout(1); 1306 PFSYNC_UNLOCK(sc); 1307 } 1308 ifp->if_mtu = ifr->ifr_mtu; 1309 break; 1310 case SIOCGETPFSYNC: 1311 bzero(&pfsyncr, sizeof(pfsyncr)); 1312 PFSYNC_LOCK(sc); 1313 if (sc->sc_sync_if) { 1314 strlcpy(pfsyncr.pfsyncr_syncdev, 1315 sc->sc_sync_if->if_xname, IFNAMSIZ); 1316 } 1317 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1318 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1319 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1320 (sc->sc_flags & PFSYNCF_DEFER)); 1321 PFSYNC_UNLOCK(sc); 1322 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1323 1324 case SIOCSETPFSYNC: 1325 { 1326 struct ip_moptions *imo = &sc->sc_imo; 1327 struct ifnet *sifp; 1328 struct ip *ip; 1329 void *mship = NULL; 1330 1331 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1332 return (error); 1333 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1334 return (error); 1335 1336 if (pfsyncr.pfsyncr_maxupdates > 255) 1337 return (EINVAL); 1338 1339 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1340 sifp = NULL; 1341 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1342 return (EINVAL); 1343 1344 if (sifp != NULL && ( 1345 pfsyncr.pfsyncr_syncpeer.s_addr == 0 || 1346 pfsyncr.pfsyncr_syncpeer.s_addr == 1347 htonl(INADDR_PFSYNC_GROUP))) 1348 mship = malloc((sizeof(struct in_multi *) * 1349 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1350 1351 PFSYNC_LOCK(sc); 1352 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1353 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1354 else 1355 sc->sc_sync_peer.s_addr = 1356 pfsyncr.pfsyncr_syncpeer.s_addr; 1357 1358 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1359 if (pfsyncr.pfsyncr_defer) { 1360 sc->sc_flags |= PFSYNCF_DEFER; 1361 pfsync_defer_ptr = pfsync_defer; 1362 } else { 1363 sc->sc_flags &= ~PFSYNCF_DEFER; 1364 pfsync_defer_ptr = NULL; 1365 } 1366 1367 if (sifp == NULL) { 1368 if (sc->sc_sync_if) 1369 if_rele(sc->sc_sync_if); 1370 sc->sc_sync_if = NULL; 1371 if (imo->imo_membership) 1372 pfsync_multicast_cleanup(sc); 1373 PFSYNC_UNLOCK(sc); 1374 break; 1375 } 1376 1377 if (sc->sc_len > PFSYNC_MINPKT && 1378 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1379 (sc->sc_sync_if != NULL && 1380 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1381 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1382 pfsync_sendout(1); 1383 1384 if (imo->imo_membership) 1385 pfsync_multicast_cleanup(sc); 1386 1387 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1388 error = pfsync_multicast_setup(sc, sifp, mship); 1389 if (error) { 1390 if_rele(sifp); 1391 free(mship, M_PFSYNC); 1392 return (error); 1393 } 1394 } 1395 if (sc->sc_sync_if) 1396 if_rele(sc->sc_sync_if); 1397 sc->sc_sync_if = sifp; 1398 1399 ip = &sc->sc_template; 1400 bzero(ip, sizeof(*ip)); 1401 ip->ip_v = IPVERSION; 1402 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1403 ip->ip_tos = IPTOS_LOWDELAY; 1404 /* len and id are set later. */ 1405 ip->ip_off = htons(IP_DF); 1406 ip->ip_ttl = PFSYNC_DFLTTL; 1407 ip->ip_p = IPPROTO_PFSYNC; 1408 ip->ip_src.s_addr = INADDR_ANY; 1409 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1410 1411 /* Request a full state table update. */ 1412 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1413 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1414 "pfsync bulk start"); 1415 sc->sc_flags &= ~PFSYNCF_OK; 1416 if (V_pf_status.debug >= PF_DEBUG_MISC) 1417 printf("pfsync: requesting bulk update\n"); 1418 pfsync_request_update(0, 0); 1419 PFSYNC_UNLOCK(sc); 1420 PFSYNC_BLOCK(sc); 1421 sc->sc_ureq_sent = time_uptime; 1422 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1423 sc); 1424 PFSYNC_BUNLOCK(sc); 1425 1426 break; 1427 } 1428 default: 1429 return (ENOTTY); 1430 } 1431 1432 return (0); 1433 } 1434 1435 static void 1436 pfsync_out_state(struct pf_state *st, void *buf) 1437 { 1438 struct pfsync_state *sp = buf; 1439 1440 pfsync_state_export(sp, st); 1441 } 1442 1443 static void 1444 pfsync_out_iack(struct pf_state *st, void *buf) 1445 { 1446 struct pfsync_ins_ack *iack = buf; 1447 1448 iack->id = st->id; 1449 iack->creatorid = st->creatorid; 1450 } 1451 1452 static void 1453 pfsync_out_upd_c(struct pf_state *st, void *buf) 1454 { 1455 struct pfsync_upd_c *up = buf; 1456 1457 bzero(up, sizeof(*up)); 1458 up->id = st->id; 1459 pf_state_peer_hton(&st->src, &up->src); 1460 pf_state_peer_hton(&st->dst, &up->dst); 1461 up->creatorid = st->creatorid; 1462 up->timeout = st->timeout; 1463 } 1464 1465 static void 1466 pfsync_out_del(struct pf_state *st, void *buf) 1467 { 1468 struct pfsync_del_c *dp = buf; 1469 1470 dp->id = st->id; 1471 dp->creatorid = st->creatorid; 1472 st->state_flags |= PFSTATE_NOSYNC; 1473 } 1474 1475 static void 1476 pfsync_drop(struct pfsync_softc *sc) 1477 { 1478 struct pf_state *st, *next; 1479 struct pfsync_upd_req_item *ur; 1480 int q; 1481 1482 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1483 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1484 continue; 1485 1486 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1487 KASSERT(st->sync_state == q, 1488 ("%s: st->sync_state == q", 1489 __func__)); 1490 st->sync_state = PFSYNC_S_NONE; 1491 pf_release_state(st); 1492 } 1493 TAILQ_INIT(&sc->sc_qs[q]); 1494 } 1495 1496 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1497 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1498 free(ur, M_PFSYNC); 1499 } 1500 1501 sc->sc_plus = NULL; 1502 sc->sc_len = PFSYNC_MINPKT; 1503 } 1504 1505 static void 1506 pfsync_sendout(int schedswi) 1507 { 1508 struct pfsync_softc *sc = V_pfsyncif; 1509 struct ifnet *ifp = sc->sc_ifp; 1510 struct mbuf *m; 1511 struct ip *ip; 1512 struct pfsync_header *ph; 1513 struct pfsync_subheader *subh; 1514 struct pf_state *st, *st_next; 1515 struct pfsync_upd_req_item *ur; 1516 int offset; 1517 int q, count = 0; 1518 1519 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1520 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1521 ("%s: sc_len %zu", __func__, sc->sc_len)); 1522 PFSYNC_LOCK_ASSERT(sc); 1523 1524 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1525 pfsync_drop(sc); 1526 return; 1527 } 1528 1529 m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1530 if (m == NULL) { 1531 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1532 V_pfsyncstats.pfsyncs_onomem++; 1533 return; 1534 } 1535 m->m_data += max_linkhdr; 1536 m->m_len = m->m_pkthdr.len = sc->sc_len; 1537 1538 /* build the ip header */ 1539 ip = (struct ip *)m->m_data; 1540 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1541 offset = sizeof(*ip); 1542 1543 ip->ip_len = htons(m->m_pkthdr.len); 1544 ip_fillid(ip); 1545 1546 /* build the pfsync header */ 1547 ph = (struct pfsync_header *)(m->m_data + offset); 1548 bzero(ph, sizeof(*ph)); 1549 offset += sizeof(*ph); 1550 1551 ph->version = PFSYNC_VERSION; 1552 ph->len = htons(sc->sc_len - sizeof(*ip)); 1553 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1554 1555 /* walk the queues */ 1556 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1557 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1558 continue; 1559 1560 subh = (struct pfsync_subheader *)(m->m_data + offset); 1561 offset += sizeof(*subh); 1562 1563 count = 0; 1564 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { 1565 KASSERT(st->sync_state == q, 1566 ("%s: st->sync_state == q", 1567 __func__)); 1568 /* 1569 * XXXGL: some of write methods do unlocked reads 1570 * of state data :( 1571 */ 1572 pfsync_qs[q].write(st, m->m_data + offset); 1573 offset += pfsync_qs[q].len; 1574 st->sync_state = PFSYNC_S_NONE; 1575 pf_release_state(st); 1576 count++; 1577 } 1578 TAILQ_INIT(&sc->sc_qs[q]); 1579 1580 bzero(subh, sizeof(*subh)); 1581 subh->action = pfsync_qs[q].action; 1582 subh->count = htons(count); 1583 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1584 } 1585 1586 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1587 subh = (struct pfsync_subheader *)(m->m_data + offset); 1588 offset += sizeof(*subh); 1589 1590 count = 0; 1591 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1592 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1593 1594 bcopy(&ur->ur_msg, m->m_data + offset, 1595 sizeof(ur->ur_msg)); 1596 offset += sizeof(ur->ur_msg); 1597 free(ur, M_PFSYNC); 1598 count++; 1599 } 1600 1601 bzero(subh, sizeof(*subh)); 1602 subh->action = PFSYNC_ACT_UPD_REQ; 1603 subh->count = htons(count); 1604 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1605 } 1606 1607 /* has someone built a custom region for us to add? */ 1608 if (sc->sc_plus != NULL) { 1609 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1610 offset += sc->sc_pluslen; 1611 1612 sc->sc_plus = NULL; 1613 } 1614 1615 subh = (struct pfsync_subheader *)(m->m_data + offset); 1616 offset += sizeof(*subh); 1617 1618 bzero(subh, sizeof(*subh)); 1619 subh->action = PFSYNC_ACT_EOF; 1620 subh->count = htons(1); 1621 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1622 1623 /* we're done, let's put it on the wire */ 1624 if (ifp->if_bpf) { 1625 m->m_data += sizeof(*ip); 1626 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1627 BPF_MTAP(ifp, m); 1628 m->m_data -= sizeof(*ip); 1629 m->m_len = m->m_pkthdr.len = sc->sc_len; 1630 } 1631 1632 if (sc->sc_sync_if == NULL) { 1633 sc->sc_len = PFSYNC_MINPKT; 1634 m_freem(m); 1635 return; 1636 } 1637 1638 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1639 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1640 sc->sc_len = PFSYNC_MINPKT; 1641 1642 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1643 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1644 else { 1645 m_freem(m); 1646 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1647 } 1648 if (schedswi) 1649 swi_sched(V_pfsync_swi_cookie, 0); 1650 } 1651 1652 static void 1653 pfsync_insert_state(struct pf_state *st) 1654 { 1655 struct pfsync_softc *sc = V_pfsyncif; 1656 1657 if (st->state_flags & PFSTATE_NOSYNC) 1658 return; 1659 1660 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1661 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1662 st->state_flags |= PFSTATE_NOSYNC; 1663 return; 1664 } 1665 1666 KASSERT(st->sync_state == PFSYNC_S_NONE, 1667 ("%s: st->sync_state %u", __func__, st->sync_state)); 1668 1669 PFSYNC_LOCK(sc); 1670 if (sc->sc_len == PFSYNC_MINPKT) 1671 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1672 1673 pfsync_q_ins(st, PFSYNC_S_INS, true); 1674 PFSYNC_UNLOCK(sc); 1675 1676 st->sync_updates = 0; 1677 } 1678 1679 static int 1680 pfsync_defer(struct pf_state *st, struct mbuf *m) 1681 { 1682 struct pfsync_softc *sc = V_pfsyncif; 1683 struct pfsync_deferral *pd; 1684 1685 if (m->m_flags & (M_BCAST|M_MCAST)) 1686 return (0); 1687 1688 PFSYNC_LOCK(sc); 1689 1690 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1691 !(sc->sc_flags & PFSYNCF_DEFER)) { 1692 PFSYNC_UNLOCK(sc); 1693 return (0); 1694 } 1695 1696 if (sc->sc_deferred >= 128) 1697 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1698 1699 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1700 if (pd == NULL) 1701 return (0); 1702 sc->sc_deferred++; 1703 1704 m->m_flags |= M_SKIP_FIREWALL; 1705 st->state_flags |= PFSTATE_ACK; 1706 1707 pd->pd_sc = sc; 1708 pd->pd_refs = 0; 1709 pd->pd_st = st; 1710 pf_ref_state(st); 1711 pd->pd_m = m; 1712 1713 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1714 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1715 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1716 1717 pfsync_push(sc); 1718 1719 return (1); 1720 } 1721 1722 static void 1723 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1724 { 1725 struct pfsync_softc *sc = pd->pd_sc; 1726 struct mbuf *m = pd->pd_m; 1727 struct pf_state *st = pd->pd_st; 1728 1729 PFSYNC_LOCK_ASSERT(sc); 1730 1731 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1732 sc->sc_deferred--; 1733 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1734 free(pd, M_PFSYNC); 1735 pf_release_state(st); 1736 1737 if (drop) 1738 m_freem(m); 1739 else { 1740 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1741 pfsync_push(sc); 1742 } 1743 } 1744 1745 static void 1746 pfsync_defer_tmo(void *arg) 1747 { 1748 struct pfsync_deferral *pd = arg; 1749 struct pfsync_softc *sc = pd->pd_sc; 1750 struct mbuf *m = pd->pd_m; 1751 struct pf_state *st = pd->pd_st; 1752 1753 PFSYNC_LOCK_ASSERT(sc); 1754 1755 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1756 1757 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1758 sc->sc_deferred--; 1759 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1760 if (pd->pd_refs == 0) 1761 free(pd, M_PFSYNC); 1762 PFSYNC_UNLOCK(sc); 1763 1764 ip_output(m, NULL, NULL, 0, NULL, NULL); 1765 1766 pf_release_state(st); 1767 1768 CURVNET_RESTORE(); 1769 } 1770 1771 static void 1772 pfsync_undefer_state(struct pf_state *st, int drop) 1773 { 1774 struct pfsync_softc *sc = V_pfsyncif; 1775 struct pfsync_deferral *pd; 1776 1777 PFSYNC_LOCK_ASSERT(sc); 1778 1779 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1780 if (pd->pd_st == st) { 1781 if (callout_stop(&pd->pd_tmo) > 0) 1782 pfsync_undefer(pd, drop); 1783 return; 1784 } 1785 } 1786 1787 panic("%s: unable to find deferred state", __func__); 1788 } 1789 1790 static void 1791 pfsync_update_state(struct pf_state *st) 1792 { 1793 struct pfsync_softc *sc = V_pfsyncif; 1794 bool sync = false, ref = true; 1795 1796 PF_STATE_LOCK_ASSERT(st); 1797 PFSYNC_LOCK(sc); 1798 1799 if (st->state_flags & PFSTATE_ACK) 1800 pfsync_undefer_state(st, 0); 1801 if (st->state_flags & PFSTATE_NOSYNC) { 1802 if (st->sync_state != PFSYNC_S_NONE) 1803 pfsync_q_del(st, true); 1804 PFSYNC_UNLOCK(sc); 1805 return; 1806 } 1807 1808 if (sc->sc_len == PFSYNC_MINPKT) 1809 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1810 1811 switch (st->sync_state) { 1812 case PFSYNC_S_UPD_C: 1813 case PFSYNC_S_UPD: 1814 case PFSYNC_S_INS: 1815 /* we're already handling it */ 1816 1817 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1818 st->sync_updates++; 1819 if (st->sync_updates >= sc->sc_maxupdates) 1820 sync = true; 1821 } 1822 break; 1823 1824 case PFSYNC_S_IACK: 1825 pfsync_q_del(st, false); 1826 ref = false; 1827 /* FALLTHROUGH */ 1828 1829 case PFSYNC_S_NONE: 1830 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1831 st->sync_updates = 0; 1832 break; 1833 1834 default: 1835 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1836 } 1837 1838 if (sync || (time_uptime - st->pfsync_time) < 2) 1839 pfsync_push(sc); 1840 1841 PFSYNC_UNLOCK(sc); 1842 } 1843 1844 static void 1845 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1846 { 1847 struct pfsync_softc *sc = V_pfsyncif; 1848 struct pfsync_upd_req_item *item; 1849 size_t nlen = sizeof(struct pfsync_upd_req); 1850 1851 PFSYNC_LOCK_ASSERT(sc); 1852 1853 /* 1854 * This code does a bit to prevent multiple update requests for the 1855 * same state being generated. It searches current subheader queue, 1856 * but it doesn't lookup into queue of already packed datagrams. 1857 */ 1858 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1859 if (item->ur_msg.id == id && 1860 item->ur_msg.creatorid == creatorid) 1861 return; 1862 1863 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1864 if (item == NULL) 1865 return; /* XXX stats */ 1866 1867 item->ur_msg.id = id; 1868 item->ur_msg.creatorid = creatorid; 1869 1870 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1871 nlen += sizeof(struct pfsync_subheader); 1872 1873 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1874 pfsync_sendout(1); 1875 1876 nlen = sizeof(struct pfsync_subheader) + 1877 sizeof(struct pfsync_upd_req); 1878 } 1879 1880 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1881 sc->sc_len += nlen; 1882 } 1883 1884 static void 1885 pfsync_update_state_req(struct pf_state *st) 1886 { 1887 struct pfsync_softc *sc = V_pfsyncif; 1888 bool ref = true; 1889 1890 PF_STATE_LOCK_ASSERT(st); 1891 PFSYNC_LOCK(sc); 1892 1893 if (st->state_flags & PFSTATE_NOSYNC) { 1894 if (st->sync_state != PFSYNC_S_NONE) 1895 pfsync_q_del(st, true); 1896 PFSYNC_UNLOCK(sc); 1897 return; 1898 } 1899 1900 switch (st->sync_state) { 1901 case PFSYNC_S_UPD_C: 1902 case PFSYNC_S_IACK: 1903 pfsync_q_del(st, false); 1904 ref = false; 1905 /* FALLTHROUGH */ 1906 1907 case PFSYNC_S_NONE: 1908 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 1909 pfsync_push(sc); 1910 break; 1911 1912 case PFSYNC_S_INS: 1913 case PFSYNC_S_UPD: 1914 case PFSYNC_S_DEL: 1915 /* we're already handling it */ 1916 break; 1917 1918 default: 1919 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1920 } 1921 1922 PFSYNC_UNLOCK(sc); 1923 } 1924 1925 static void 1926 pfsync_delete_state(struct pf_state *st) 1927 { 1928 struct pfsync_softc *sc = V_pfsyncif; 1929 bool ref = true; 1930 1931 PFSYNC_LOCK(sc); 1932 if (st->state_flags & PFSTATE_ACK) 1933 pfsync_undefer_state(st, 1); 1934 if (st->state_flags & PFSTATE_NOSYNC) { 1935 if (st->sync_state != PFSYNC_S_NONE) 1936 pfsync_q_del(st, true); 1937 PFSYNC_UNLOCK(sc); 1938 return; 1939 } 1940 1941 if (sc->sc_len == PFSYNC_MINPKT) 1942 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1943 1944 switch (st->sync_state) { 1945 case PFSYNC_S_INS: 1946 /* We never got to tell the world so just forget about it. */ 1947 pfsync_q_del(st, true); 1948 break; 1949 1950 case PFSYNC_S_UPD_C: 1951 case PFSYNC_S_UPD: 1952 case PFSYNC_S_IACK: 1953 pfsync_q_del(st, false); 1954 ref = false; 1955 /* FALLTHROUGH */ 1956 1957 case PFSYNC_S_NONE: 1958 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 1959 break; 1960 1961 default: 1962 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1963 } 1964 1965 PFSYNC_UNLOCK(sc); 1966 } 1967 1968 static void 1969 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1970 { 1971 struct pfsync_softc *sc = V_pfsyncif; 1972 struct { 1973 struct pfsync_subheader subh; 1974 struct pfsync_clr clr; 1975 } __packed r; 1976 1977 bzero(&r, sizeof(r)); 1978 1979 r.subh.action = PFSYNC_ACT_CLR; 1980 r.subh.count = htons(1); 1981 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1982 1983 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1984 r.clr.creatorid = creatorid; 1985 1986 PFSYNC_LOCK(sc); 1987 pfsync_send_plus(&r, sizeof(r)); 1988 PFSYNC_UNLOCK(sc); 1989 } 1990 1991 static void 1992 pfsync_q_ins(struct pf_state *st, int q, bool ref) 1993 { 1994 struct pfsync_softc *sc = V_pfsyncif; 1995 size_t nlen = pfsync_qs[q].len; 1996 1997 PFSYNC_LOCK_ASSERT(sc); 1998 1999 KASSERT(st->sync_state == PFSYNC_S_NONE, 2000 ("%s: st->sync_state %u", __func__, st->sync_state)); 2001 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2002 sc->sc_len)); 2003 2004 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2005 nlen += sizeof(struct pfsync_subheader); 2006 2007 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 2008 pfsync_sendout(1); 2009 2010 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2011 } 2012 2013 sc->sc_len += nlen; 2014 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2015 st->sync_state = q; 2016 if (ref) 2017 pf_ref_state(st); 2018 } 2019 2020 static void 2021 pfsync_q_del(struct pf_state *st, bool unref) 2022 { 2023 struct pfsync_softc *sc = V_pfsyncif; 2024 int q = st->sync_state; 2025 2026 PFSYNC_LOCK_ASSERT(sc); 2027 KASSERT(st->sync_state != PFSYNC_S_NONE, 2028 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2029 2030 sc->sc_len -= pfsync_qs[q].len; 2031 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2032 st->sync_state = PFSYNC_S_NONE; 2033 if (unref) 2034 pf_release_state(st); 2035 2036 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2037 sc->sc_len -= sizeof(struct pfsync_subheader); 2038 } 2039 2040 static void 2041 pfsync_bulk_start(void) 2042 { 2043 struct pfsync_softc *sc = V_pfsyncif; 2044 2045 if (V_pf_status.debug >= PF_DEBUG_MISC) 2046 printf("pfsync: received bulk update request\n"); 2047 2048 PFSYNC_BLOCK(sc); 2049 2050 sc->sc_ureq_received = time_uptime; 2051 sc->sc_bulk_hashid = 0; 2052 sc->sc_bulk_stateid = 0; 2053 pfsync_bulk_status(PFSYNC_BUS_START); 2054 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2055 PFSYNC_BUNLOCK(sc); 2056 } 2057 2058 static void 2059 pfsync_bulk_update(void *arg) 2060 { 2061 struct pfsync_softc *sc = arg; 2062 struct pf_state *s; 2063 int i, sent = 0; 2064 2065 PFSYNC_BLOCK_ASSERT(sc); 2066 CURVNET_SET(sc->sc_ifp->if_vnet); 2067 2068 /* 2069 * Start with last state from previous invocation. 2070 * It may had gone, in this case start from the 2071 * hash slot. 2072 */ 2073 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2074 2075 if (s != NULL) 2076 i = PF_IDHASH(s); 2077 else 2078 i = sc->sc_bulk_hashid; 2079 2080 for (; i <= pf_hashmask; i++) { 2081 struct pf_idhash *ih = &V_pf_idhash[i]; 2082 2083 if (s != NULL) 2084 PF_HASHROW_ASSERT(ih); 2085 else { 2086 PF_HASHROW_LOCK(ih); 2087 s = LIST_FIRST(&ih->states); 2088 } 2089 2090 for (; s; s = LIST_NEXT(s, entry)) { 2091 2092 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2093 sizeof(struct pfsync_state)) { 2094 /* We've filled a packet. */ 2095 sc->sc_bulk_hashid = i; 2096 sc->sc_bulk_stateid = s->id; 2097 sc->sc_bulk_creatorid = s->creatorid; 2098 PF_HASHROW_UNLOCK(ih); 2099 callout_reset(&sc->sc_bulk_tmo, 1, 2100 pfsync_bulk_update, sc); 2101 goto full; 2102 } 2103 2104 if (s->sync_state == PFSYNC_S_NONE && 2105 s->timeout < PFTM_MAX && 2106 s->pfsync_time <= sc->sc_ureq_received) { 2107 pfsync_update_state_req(s); 2108 sent++; 2109 } 2110 } 2111 PF_HASHROW_UNLOCK(ih); 2112 } 2113 2114 /* We're done. */ 2115 pfsync_bulk_status(PFSYNC_BUS_END); 2116 2117 full: 2118 CURVNET_RESTORE(); 2119 } 2120 2121 static void 2122 pfsync_bulk_status(u_int8_t status) 2123 { 2124 struct { 2125 struct pfsync_subheader subh; 2126 struct pfsync_bus bus; 2127 } __packed r; 2128 2129 struct pfsync_softc *sc = V_pfsyncif; 2130 2131 bzero(&r, sizeof(r)); 2132 2133 r.subh.action = PFSYNC_ACT_BUS; 2134 r.subh.count = htons(1); 2135 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2136 2137 r.bus.creatorid = V_pf_status.hostid; 2138 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2139 r.bus.status = status; 2140 2141 PFSYNC_LOCK(sc); 2142 pfsync_send_plus(&r, sizeof(r)); 2143 PFSYNC_UNLOCK(sc); 2144 } 2145 2146 static void 2147 pfsync_bulk_fail(void *arg) 2148 { 2149 struct pfsync_softc *sc = arg; 2150 2151 CURVNET_SET(sc->sc_ifp->if_vnet); 2152 2153 PFSYNC_BLOCK_ASSERT(sc); 2154 2155 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2156 /* Try again */ 2157 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2158 pfsync_bulk_fail, V_pfsyncif); 2159 PFSYNC_LOCK(sc); 2160 pfsync_request_update(0, 0); 2161 PFSYNC_UNLOCK(sc); 2162 } else { 2163 /* Pretend like the transfer was ok. */ 2164 sc->sc_ureq_sent = 0; 2165 sc->sc_bulk_tries = 0; 2166 PFSYNC_LOCK(sc); 2167 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2168 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2169 "pfsync bulk fail"); 2170 sc->sc_flags |= PFSYNCF_OK; 2171 PFSYNC_UNLOCK(sc); 2172 if (V_pf_status.debug >= PF_DEBUG_MISC) 2173 printf("pfsync: failed to receive bulk update\n"); 2174 } 2175 2176 CURVNET_RESTORE(); 2177 } 2178 2179 static void 2180 pfsync_send_plus(void *plus, size_t pluslen) 2181 { 2182 struct pfsync_softc *sc = V_pfsyncif; 2183 2184 PFSYNC_LOCK_ASSERT(sc); 2185 2186 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2187 pfsync_sendout(1); 2188 2189 sc->sc_plus = plus; 2190 sc->sc_len += (sc->sc_pluslen = pluslen); 2191 2192 pfsync_sendout(1); 2193 } 2194 2195 static void 2196 pfsync_timeout(void *arg) 2197 { 2198 struct pfsync_softc *sc = arg; 2199 2200 CURVNET_SET(sc->sc_ifp->if_vnet); 2201 PFSYNC_LOCK(sc); 2202 pfsync_push(sc); 2203 PFSYNC_UNLOCK(sc); 2204 CURVNET_RESTORE(); 2205 } 2206 2207 static void 2208 pfsync_push(struct pfsync_softc *sc) 2209 { 2210 2211 PFSYNC_LOCK_ASSERT(sc); 2212 2213 sc->sc_flags |= PFSYNCF_PUSH; 2214 swi_sched(V_pfsync_swi_cookie, 0); 2215 } 2216 2217 static void 2218 pfsyncintr(void *arg) 2219 { 2220 struct pfsync_softc *sc = arg; 2221 struct mbuf *m, *n; 2222 2223 CURVNET_SET(sc->sc_ifp->if_vnet); 2224 2225 PFSYNC_LOCK(sc); 2226 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2227 pfsync_sendout(0); 2228 sc->sc_flags &= ~PFSYNCF_PUSH; 2229 } 2230 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2231 PFSYNC_UNLOCK(sc); 2232 2233 for (; m != NULL; m = n) { 2234 2235 n = m->m_nextpkt; 2236 m->m_nextpkt = NULL; 2237 2238 /* 2239 * We distinguish between a deferral packet and our 2240 * own pfsync packet based on M_SKIP_FIREWALL 2241 * flag. This is XXX. 2242 */ 2243 if (m->m_flags & M_SKIP_FIREWALL) 2244 ip_output(m, NULL, NULL, 0, NULL, NULL); 2245 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2246 NULL) == 0) 2247 V_pfsyncstats.pfsyncs_opackets++; 2248 else 2249 V_pfsyncstats.pfsyncs_oerrors++; 2250 } 2251 CURVNET_RESTORE(); 2252 } 2253 2254 static int 2255 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2256 { 2257 struct ip_moptions *imo = &sc->sc_imo; 2258 int error; 2259 2260 if (!(ifp->if_flags & IFF_MULTICAST)) 2261 return (EADDRNOTAVAIL); 2262 2263 imo->imo_membership = (struct in_multi **)mship; 2264 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2265 imo->imo_multicast_vif = -1; 2266 2267 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2268 &imo->imo_membership[0])) != 0) { 2269 imo->imo_membership = NULL; 2270 return (error); 2271 } 2272 imo->imo_num_memberships++; 2273 imo->imo_multicast_ifp = ifp; 2274 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2275 imo->imo_multicast_loop = 0; 2276 2277 return (0); 2278 } 2279 2280 static void 2281 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2282 { 2283 struct ip_moptions *imo = &sc->sc_imo; 2284 2285 in_leavegroup(imo->imo_membership[0], NULL); 2286 free(imo->imo_membership, M_PFSYNC); 2287 imo->imo_membership = NULL; 2288 imo->imo_multicast_ifp = NULL; 2289 } 2290 2291 #ifdef INET 2292 extern struct domain inetdomain; 2293 static struct protosw in_pfsync_protosw = { 2294 .pr_type = SOCK_RAW, 2295 .pr_domain = &inetdomain, 2296 .pr_protocol = IPPROTO_PFSYNC, 2297 .pr_flags = PR_ATOMIC|PR_ADDR, 2298 .pr_input = pfsync_input, 2299 .pr_output = rip_output, 2300 .pr_ctloutput = rip_ctloutput, 2301 .pr_usrreqs = &rip_usrreqs 2302 }; 2303 #endif 2304 2305 static void 2306 pfsync_pointers_init() 2307 { 2308 2309 PF_RULES_WLOCK(); 2310 pfsync_state_import_ptr = pfsync_state_import; 2311 pfsync_insert_state_ptr = pfsync_insert_state; 2312 pfsync_update_state_ptr = pfsync_update_state; 2313 pfsync_delete_state_ptr = pfsync_delete_state; 2314 pfsync_clear_states_ptr = pfsync_clear_states; 2315 pfsync_defer_ptr = pfsync_defer; 2316 PF_RULES_WUNLOCK(); 2317 } 2318 2319 static void 2320 pfsync_pointers_uninit() 2321 { 2322 2323 PF_RULES_WLOCK(); 2324 pfsync_state_import_ptr = NULL; 2325 pfsync_insert_state_ptr = NULL; 2326 pfsync_update_state_ptr = NULL; 2327 pfsync_delete_state_ptr = NULL; 2328 pfsync_clear_states_ptr = NULL; 2329 pfsync_defer_ptr = NULL; 2330 PF_RULES_WUNLOCK(); 2331 } 2332 2333 static void 2334 vnet_pfsync_init(const void *unused __unused) 2335 { 2336 int error; 2337 2338 V_pfsync_cloner = if_clone_simple(pfsyncname, 2339 pfsync_clone_create, pfsync_clone_destroy, 1); 2340 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2341 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2342 if (error) { 2343 if_clone_detach(V_pfsync_cloner); 2344 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2345 } 2346 } 2347 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2348 vnet_pfsync_init, NULL); 2349 2350 static void 2351 vnet_pfsync_uninit(const void *unused __unused) 2352 { 2353 2354 if_clone_detach(V_pfsync_cloner); 2355 swi_remove(V_pfsync_swi_cookie); 2356 } 2357 /* 2358 * Detach after pf is gone; otherwise we might touch pfsync memory 2359 * from within pf after freeing pfsync. 2360 */ 2361 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, 2362 vnet_pfsync_uninit, NULL); 2363 2364 static int 2365 pfsync_init() 2366 { 2367 #ifdef INET 2368 int error; 2369 2370 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2371 if (error) 2372 return (error); 2373 error = ipproto_register(IPPROTO_PFSYNC); 2374 if (error) { 2375 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2376 return (error); 2377 } 2378 #endif 2379 pfsync_pointers_init(); 2380 2381 return (0); 2382 } 2383 2384 static void 2385 pfsync_uninit() 2386 { 2387 2388 pfsync_pointers_uninit(); 2389 2390 #ifdef INET 2391 ipproto_unregister(IPPROTO_PFSYNC); 2392 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2393 #endif 2394 } 2395 2396 static int 2397 pfsync_modevent(module_t mod, int type, void *data) 2398 { 2399 int error = 0; 2400 2401 switch (type) { 2402 case MOD_LOAD: 2403 error = pfsync_init(); 2404 break; 2405 case MOD_QUIESCE: 2406 /* 2407 * Module should not be unloaded due to race conditions. 2408 */ 2409 error = EBUSY; 2410 break; 2411 case MOD_UNLOAD: 2412 pfsync_uninit(); 2413 break; 2414 default: 2415 error = EINVAL; 2416 break; 2417 } 2418 2419 return (error); 2420 } 2421 2422 static moduledata_t pfsync_mod = { 2423 pfsyncname, 2424 pfsync_modevent, 2425 0 2426 }; 2427 2428 #define PFSYNC_MODVER 1 2429 2430 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2431 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2432 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2433 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2434