1 /*- 2 * Copyright (c) 2002 Michael Shalayeff 3 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 25 * THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /*- 29 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 30 * 31 * Permission to use, copy, modify, and distribute this software for any 32 * purpose with or without fee is hereby granted, provided that the above 33 * copyright notice and this permission notice appear in all copies. 34 * 35 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 36 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 37 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 38 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 39 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 40 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 41 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 42 */ 43 44 /* 45 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 46 * 47 * Revisions picked from OpenBSD after revision 1.110 import: 48 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 49 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 50 * 1.120, 1.175 - use monotonic time_uptime 51 * 1.122 - reduce number of updates for non-TCP sessions 52 * 1.125, 1.127 - rewrite merge or stale processing 53 * 1.128 - cleanups 54 * 1.146 - bzero() mbuf before sparsely filling it with data 55 * 1.170 - SIOCSIFMTU checks 56 * 1.126, 1.142 - deferred packets processing 57 * 1.173 - correct expire time processing 58 */ 59 60 #include <sys/cdefs.h> 61 __FBSDID("$FreeBSD$"); 62 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 #include "opt_pf.h" 66 67 #include <sys/param.h> 68 #include <sys/bus.h> 69 #include <sys/endian.h> 70 #include <sys/interrupt.h> 71 #include <sys/kernel.h> 72 #include <sys/lock.h> 73 #include <sys/mbuf.h> 74 #include <sys/module.h> 75 #include <sys/mutex.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/sockio.h> 80 #include <sys/sysctl.h> 81 82 #include <net/bpf.h> 83 #include <net/if.h> 84 #include <net/if_var.h> 85 #include <net/if_clone.h> 86 #include <net/if_types.h> 87 #include <net/vnet.h> 88 #include <net/pfvar.h> 89 #include <net/if_pfsync.h> 90 91 #include <netinet/if_ether.h> 92 #include <netinet/in.h> 93 #include <netinet/in_var.h> 94 #include <netinet/ip.h> 95 #include <netinet/ip_carp.h> 96 #include <netinet/ip_var.h> 97 #include <netinet/tcp.h> 98 #include <netinet/tcp_fsm.h> 99 #include <netinet/tcp_seq.h> 100 101 #define PFSYNC_MINPKT ( \ 102 sizeof(struct ip) + \ 103 sizeof(struct pfsync_header) + \ 104 sizeof(struct pfsync_subheader) ) 105 106 struct pfsync_pkt { 107 struct ip *ip; 108 struct in_addr src; 109 u_int8_t flags; 110 }; 111 112 static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 113 struct pfsync_state_peer *); 114 static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 115 static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 116 static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 117 static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 118 static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 119 static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 120 static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 121 static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 122 static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 123 static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 124 static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 125 static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 126 127 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 128 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 129 pfsync_in_ins, /* PFSYNC_ACT_INS */ 130 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 131 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 132 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 133 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 134 pfsync_in_del, /* PFSYNC_ACT_DEL */ 135 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 136 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 137 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 138 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 139 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 140 pfsync_in_eof /* PFSYNC_ACT_EOF */ 141 }; 142 143 struct pfsync_q { 144 void (*write)(struct pf_state *, void *); 145 size_t len; 146 u_int8_t action; 147 }; 148 149 /* we have one of these for every PFSYNC_S_ */ 150 static void pfsync_out_state(struct pf_state *, void *); 151 static void pfsync_out_iack(struct pf_state *, void *); 152 static void pfsync_out_upd_c(struct pf_state *, void *); 153 static void pfsync_out_del(struct pf_state *, void *); 154 155 static struct pfsync_q pfsync_qs[] = { 156 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 157 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 158 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 159 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 160 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 161 }; 162 163 static void pfsync_q_ins(struct pf_state *, int); 164 static void pfsync_q_del(struct pf_state *); 165 166 static void pfsync_update_state(struct pf_state *); 167 168 struct pfsync_upd_req_item { 169 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 170 struct pfsync_upd_req ur_msg; 171 }; 172 173 struct pfsync_deferral { 174 struct pfsync_softc *pd_sc; 175 TAILQ_ENTRY(pfsync_deferral) pd_entry; 176 u_int pd_refs; 177 struct callout pd_tmo; 178 179 struct pf_state *pd_st; 180 struct mbuf *pd_m; 181 }; 182 183 struct pfsync_softc { 184 /* Configuration */ 185 struct ifnet *sc_ifp; 186 struct ifnet *sc_sync_if; 187 struct ip_moptions sc_imo; 188 struct in_addr sc_sync_peer; 189 uint32_t sc_flags; 190 #define PFSYNCF_OK 0x00000001 191 #define PFSYNCF_DEFER 0x00000002 192 #define PFSYNCF_PUSH 0x00000004 193 uint8_t sc_maxupdates; 194 struct ip sc_template; 195 struct callout sc_tmo; 196 struct mtx sc_mtx; 197 198 /* Queued data */ 199 size_t sc_len; 200 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 201 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 202 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 203 u_int sc_deferred; 204 void *sc_plus; 205 size_t sc_pluslen; 206 207 /* Bulk update info */ 208 struct mtx sc_bulk_mtx; 209 uint32_t sc_ureq_sent; 210 int sc_bulk_tries; 211 uint32_t sc_ureq_received; 212 int sc_bulk_hashid; 213 uint64_t sc_bulk_stateid; 214 uint32_t sc_bulk_creatorid; 215 struct callout sc_bulk_tmo; 216 struct callout sc_bulkfail_tmo; 217 }; 218 219 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 220 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 221 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 222 223 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 224 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 225 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 226 227 static const char pfsyncname[] = "pfsync"; 228 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 229 static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; 230 #define V_pfsyncif VNET(pfsyncif) 231 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; 232 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 233 static VNET_DEFINE(struct pfsyncstats, pfsyncstats); 234 #define V_pfsyncstats VNET(pfsyncstats) 235 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; 236 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 237 238 static void pfsync_timeout(void *); 239 static void pfsync_push(struct pfsync_softc *); 240 static void pfsyncintr(void *); 241 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 242 void *); 243 static void pfsync_multicast_cleanup(struct pfsync_softc *); 244 static void pfsync_pointers_init(void); 245 static void pfsync_pointers_uninit(void); 246 static int pfsync_init(void); 247 static void pfsync_uninit(void); 248 249 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 250 SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, 251 &VNET_NAME(pfsyncstats), pfsyncstats, 252 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 253 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 254 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 255 256 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 257 static void pfsync_clone_destroy(struct ifnet *); 258 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 259 struct pf_state_peer *); 260 static int pfsyncoutput(struct ifnet *, struct mbuf *, 261 const struct sockaddr *, struct route *); 262 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 263 264 static int pfsync_defer(struct pf_state *, struct mbuf *); 265 static void pfsync_undefer(struct pfsync_deferral *, int); 266 static void pfsync_undefer_state(struct pf_state *, int); 267 static void pfsync_defer_tmo(void *); 268 269 static void pfsync_request_update(u_int32_t, u_int64_t); 270 static void pfsync_update_state_req(struct pf_state *); 271 272 static void pfsync_drop(struct pfsync_softc *); 273 static void pfsync_sendout(int); 274 static void pfsync_send_plus(void *, size_t); 275 276 static void pfsync_bulk_start(void); 277 static void pfsync_bulk_status(u_int8_t); 278 static void pfsync_bulk_update(void *); 279 static void pfsync_bulk_fail(void *); 280 281 #ifdef IPSEC 282 static void pfsync_update_net_tdb(struct pfsync_tdb *); 283 #endif 284 285 #define PFSYNC_MAX_BULKTRIES 12 286 287 VNET_DEFINE(struct if_clone *, pfsync_cloner); 288 #define V_pfsync_cloner VNET(pfsync_cloner) 289 290 static int 291 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 292 { 293 struct pfsync_softc *sc; 294 struct ifnet *ifp; 295 int q; 296 297 if (unit != 0) 298 return (EINVAL); 299 300 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 301 sc->sc_flags |= PFSYNCF_OK; 302 303 for (q = 0; q < PFSYNC_S_COUNT; q++) 304 TAILQ_INIT(&sc->sc_qs[q]); 305 306 TAILQ_INIT(&sc->sc_upd_req_list); 307 TAILQ_INIT(&sc->sc_deferrals); 308 309 sc->sc_len = PFSYNC_MINPKT; 310 sc->sc_maxupdates = 128; 311 312 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 313 if (ifp == NULL) { 314 free(sc, M_PFSYNC); 315 return (ENOSPC); 316 } 317 if_initname(ifp, pfsyncname, unit); 318 ifp->if_softc = sc; 319 ifp->if_ioctl = pfsyncioctl; 320 ifp->if_output = pfsyncoutput; 321 ifp->if_type = IFT_PFSYNC; 322 ifp->if_snd.ifq_maxlen = ifqmaxlen; 323 ifp->if_hdrlen = sizeof(struct pfsync_header); 324 ifp->if_mtu = ETHERMTU; 325 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 326 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 327 callout_init(&sc->sc_tmo, CALLOUT_MPSAFE); 328 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 329 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 330 331 if_attach(ifp); 332 333 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 334 335 V_pfsyncif = sc; 336 337 return (0); 338 } 339 340 static void 341 pfsync_clone_destroy(struct ifnet *ifp) 342 { 343 struct pfsync_softc *sc = ifp->if_softc; 344 345 /* 346 * At this stage, everything should have already been 347 * cleared by pfsync_uninit(), and we have only to 348 * drain callouts. 349 */ 350 while (sc->sc_deferred > 0) { 351 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 352 353 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 354 sc->sc_deferred--; 355 if (callout_stop(&pd->pd_tmo)) { 356 pf_release_state(pd->pd_st); 357 m_freem(pd->pd_m); 358 free(pd, M_PFSYNC); 359 } else { 360 pd->pd_refs++; 361 callout_drain(&pd->pd_tmo); 362 free(pd, M_PFSYNC); 363 } 364 } 365 366 callout_drain(&sc->sc_tmo); 367 callout_drain(&sc->sc_bulkfail_tmo); 368 callout_drain(&sc->sc_bulk_tmo); 369 370 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 371 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 372 bpfdetach(ifp); 373 if_detach(ifp); 374 375 pfsync_drop(sc); 376 377 if_free(ifp); 378 if (sc->sc_imo.imo_membership) 379 pfsync_multicast_cleanup(sc); 380 mtx_destroy(&sc->sc_mtx); 381 mtx_destroy(&sc->sc_bulk_mtx); 382 free(sc, M_PFSYNC); 383 384 V_pfsyncif = NULL; 385 } 386 387 static int 388 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 389 struct pf_state_peer *d) 390 { 391 if (s->scrub.scrub_flag && d->scrub == NULL) { 392 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 393 if (d->scrub == NULL) 394 return (ENOMEM); 395 } 396 397 return (0); 398 } 399 400 401 static int 402 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 403 { 404 struct pfsync_softc *sc = V_pfsyncif; 405 #ifndef __NO_STRICT_ALIGNMENT 406 struct pfsync_state_key key[2]; 407 #endif 408 struct pfsync_state_key *kw, *ks; 409 struct pf_state *st = NULL; 410 struct pf_state_key *skw = NULL, *sks = NULL; 411 struct pf_rule *r = NULL; 412 struct pfi_kif *kif; 413 int error; 414 415 PF_RULES_RASSERT(); 416 417 if (sp->creatorid == 0) { 418 if (V_pf_status.debug >= PF_DEBUG_MISC) 419 printf("%s: invalid creator id: %08x\n", __func__, 420 ntohl(sp->creatorid)); 421 return (EINVAL); 422 } 423 424 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 425 if (V_pf_status.debug >= PF_DEBUG_MISC) 426 printf("%s: unknown interface: %s\n", __func__, 427 sp->ifname); 428 if (flags & PFSYNC_SI_IOCTL) 429 return (EINVAL); 430 return (0); /* skip this state */ 431 } 432 433 /* 434 * If the ruleset checksums match or the state is coming from the ioctl, 435 * it's safe to associate the state with the rule of that number. 436 */ 437 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 438 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 439 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 440 r = pf_main_ruleset.rules[ 441 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 442 else 443 r = &V_pf_default_rule; 444 445 if ((r->max_states && 446 counter_u64_fetch(r->states_cur) >= r->max_states)) 447 goto cleanup; 448 449 /* 450 * XXXGL: consider M_WAITOK in ioctl path after. 451 */ 452 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 453 goto cleanup; 454 455 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 456 goto cleanup; 457 458 #ifndef __NO_STRICT_ALIGNMENT 459 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 460 kw = &key[PF_SK_WIRE]; 461 ks = &key[PF_SK_STACK]; 462 #else 463 kw = &sp->key[PF_SK_WIRE]; 464 ks = &sp->key[PF_SK_STACK]; 465 #endif 466 467 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 468 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 469 kw->port[0] != ks->port[0] || 470 kw->port[1] != ks->port[1]) { 471 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 472 if (sks == NULL) 473 goto cleanup; 474 } else 475 sks = skw; 476 477 /* allocate memory for scrub info */ 478 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 479 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 480 goto cleanup; 481 482 /* Copy to state key(s). */ 483 skw->addr[0] = kw->addr[0]; 484 skw->addr[1] = kw->addr[1]; 485 skw->port[0] = kw->port[0]; 486 skw->port[1] = kw->port[1]; 487 skw->proto = sp->proto; 488 skw->af = sp->af; 489 if (sks != skw) { 490 sks->addr[0] = ks->addr[0]; 491 sks->addr[1] = ks->addr[1]; 492 sks->port[0] = ks->port[0]; 493 sks->port[1] = ks->port[1]; 494 sks->proto = sp->proto; 495 sks->af = sp->af; 496 } 497 498 /* copy to state */ 499 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 500 st->creation = time_uptime - ntohl(sp->creation); 501 st->expire = time_uptime; 502 if (sp->expire) { 503 uint32_t timeout; 504 505 timeout = r->timeout[sp->timeout]; 506 if (!timeout) 507 timeout = V_pf_default_rule.timeout[sp->timeout]; 508 509 /* sp->expire may have been adaptively scaled by export. */ 510 st->expire -= timeout - ntohl(sp->expire); 511 } 512 513 st->direction = sp->direction; 514 st->log = sp->log; 515 st->timeout = sp->timeout; 516 st->state_flags = sp->state_flags; 517 518 st->id = sp->id; 519 st->creatorid = sp->creatorid; 520 pf_state_peer_ntoh(&sp->src, &st->src); 521 pf_state_peer_ntoh(&sp->dst, &st->dst); 522 523 st->rule.ptr = r; 524 st->nat_rule.ptr = NULL; 525 st->anchor.ptr = NULL; 526 st->rt_kif = NULL; 527 528 st->pfsync_time = time_uptime; 529 st->sync_state = PFSYNC_S_NONE; 530 531 if (!(flags & PFSYNC_SI_IOCTL)) 532 st->state_flags |= PFSTATE_NOSYNC; 533 534 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) 535 goto cleanup_state; 536 537 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 538 counter_u64_add(r->states_cur, 1); 539 counter_u64_add(r->states_tot, 1); 540 541 if (!(flags & PFSYNC_SI_IOCTL)) { 542 st->state_flags &= ~PFSTATE_NOSYNC; 543 if (st->state_flags & PFSTATE_ACK) { 544 pfsync_q_ins(st, PFSYNC_S_IACK); 545 pfsync_push(sc); 546 } 547 } 548 st->state_flags &= ~PFSTATE_ACK; 549 PF_STATE_UNLOCK(st); 550 551 return (0); 552 553 cleanup: 554 error = ENOMEM; 555 if (skw == sks) 556 sks = NULL; 557 if (skw != NULL) 558 uma_zfree(V_pf_state_key_z, skw); 559 if (sks != NULL) 560 uma_zfree(V_pf_state_key_z, sks); 561 562 cleanup_state: /* pf_state_insert() frees the state keys. */ 563 if (st) { 564 if (st->dst.scrub) 565 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 566 if (st->src.scrub) 567 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 568 uma_zfree(V_pf_state_z, st); 569 } 570 return (error); 571 } 572 573 static void 574 pfsync_input(struct mbuf *m, __unused int off) 575 { 576 struct pfsync_softc *sc = V_pfsyncif; 577 struct pfsync_pkt pkt; 578 struct ip *ip = mtod(m, struct ip *); 579 struct pfsync_header *ph; 580 struct pfsync_subheader subh; 581 582 int offset, len; 583 int rv; 584 uint16_t count; 585 586 V_pfsyncstats.pfsyncs_ipackets++; 587 588 /* Verify that we have a sync interface configured. */ 589 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 590 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 591 goto done; 592 593 /* verify that the packet came in on the right interface */ 594 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 595 V_pfsyncstats.pfsyncs_badif++; 596 goto done; 597 } 598 599 sc->sc_ifp->if_ipackets++; 600 sc->sc_ifp->if_ibytes += m->m_pkthdr.len; 601 /* verify that the IP TTL is 255. */ 602 if (ip->ip_ttl != PFSYNC_DFLTTL) { 603 V_pfsyncstats.pfsyncs_badttl++; 604 goto done; 605 } 606 607 offset = ip->ip_hl << 2; 608 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 609 V_pfsyncstats.pfsyncs_hdrops++; 610 goto done; 611 } 612 613 if (offset + sizeof(*ph) > m->m_len) { 614 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 615 V_pfsyncstats.pfsyncs_hdrops++; 616 return; 617 } 618 ip = mtod(m, struct ip *); 619 } 620 ph = (struct pfsync_header *)((char *)ip + offset); 621 622 /* verify the version */ 623 if (ph->version != PFSYNC_VERSION) { 624 V_pfsyncstats.pfsyncs_badver++; 625 goto done; 626 } 627 628 len = ntohs(ph->len) + offset; 629 if (m->m_pkthdr.len < len) { 630 V_pfsyncstats.pfsyncs_badlen++; 631 goto done; 632 } 633 634 /* Cheaper to grab this now than having to mess with mbufs later */ 635 pkt.ip = ip; 636 pkt.src = ip->ip_src; 637 pkt.flags = 0; 638 639 /* 640 * Trusting pf_chksum during packet processing, as well as seeking 641 * in interface name tree, require holding PF_RULES_RLOCK(). 642 */ 643 PF_RULES_RLOCK(); 644 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 645 pkt.flags |= PFSYNC_SI_CKSUM; 646 647 offset += sizeof(*ph); 648 while (offset <= len - sizeof(subh)) { 649 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 650 offset += sizeof(subh); 651 652 if (subh.action >= PFSYNC_ACT_MAX) { 653 V_pfsyncstats.pfsyncs_badact++; 654 PF_RULES_RUNLOCK(); 655 goto done; 656 } 657 658 count = ntohs(subh.count); 659 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 660 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 661 if (rv == -1) { 662 PF_RULES_RUNLOCK(); 663 return; 664 } 665 666 offset += rv; 667 } 668 PF_RULES_RUNLOCK(); 669 670 done: 671 m_freem(m); 672 } 673 674 static int 675 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 676 { 677 struct pfsync_clr *clr; 678 struct mbuf *mp; 679 int len = sizeof(*clr) * count; 680 int i, offp; 681 u_int32_t creatorid; 682 683 mp = m_pulldown(m, offset, len, &offp); 684 if (mp == NULL) { 685 V_pfsyncstats.pfsyncs_badlen++; 686 return (-1); 687 } 688 clr = (struct pfsync_clr *)(mp->m_data + offp); 689 690 for (i = 0; i < count; i++) { 691 creatorid = clr[i].creatorid; 692 693 if (clr[i].ifname[0] != '\0' && 694 pfi_kif_find(clr[i].ifname) == NULL) 695 continue; 696 697 for (int i = 0; i <= pf_hashmask; i++) { 698 struct pf_idhash *ih = &V_pf_idhash[i]; 699 struct pf_state *s; 700 relock: 701 PF_HASHROW_LOCK(ih); 702 LIST_FOREACH(s, &ih->states, entry) { 703 if (s->creatorid == creatorid) { 704 s->state_flags |= PFSTATE_NOSYNC; 705 pf_unlink_state(s, PF_ENTER_LOCKED); 706 goto relock; 707 } 708 } 709 PF_HASHROW_UNLOCK(ih); 710 } 711 } 712 713 return (len); 714 } 715 716 static int 717 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 718 { 719 struct mbuf *mp; 720 struct pfsync_state *sa, *sp; 721 int len = sizeof(*sp) * count; 722 int i, offp; 723 724 mp = m_pulldown(m, offset, len, &offp); 725 if (mp == NULL) { 726 V_pfsyncstats.pfsyncs_badlen++; 727 return (-1); 728 } 729 sa = (struct pfsync_state *)(mp->m_data + offp); 730 731 for (i = 0; i < count; i++) { 732 sp = &sa[i]; 733 734 /* Check for invalid values. */ 735 if (sp->timeout >= PFTM_MAX || 736 sp->src.state > PF_TCPS_PROXY_DST || 737 sp->dst.state > PF_TCPS_PROXY_DST || 738 sp->direction > PF_OUT || 739 (sp->af != AF_INET && sp->af != AF_INET6)) { 740 if (V_pf_status.debug >= PF_DEBUG_MISC) 741 printf("%s: invalid value\n", __func__); 742 V_pfsyncstats.pfsyncs_badval++; 743 continue; 744 } 745 746 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 747 /* Drop out, but process the rest of the actions. */ 748 break; 749 } 750 751 return (len); 752 } 753 754 static int 755 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 756 { 757 struct pfsync_ins_ack *ia, *iaa; 758 struct pf_state *st; 759 760 struct mbuf *mp; 761 int len = count * sizeof(*ia); 762 int offp, i; 763 764 mp = m_pulldown(m, offset, len, &offp); 765 if (mp == NULL) { 766 V_pfsyncstats.pfsyncs_badlen++; 767 return (-1); 768 } 769 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 770 771 for (i = 0; i < count; i++) { 772 ia = &iaa[i]; 773 774 st = pf_find_state_byid(ia->id, ia->creatorid); 775 if (st == NULL) 776 continue; 777 778 if (st->state_flags & PFSTATE_ACK) { 779 PFSYNC_LOCK(V_pfsyncif); 780 pfsync_undefer_state(st, 0); 781 PFSYNC_UNLOCK(V_pfsyncif); 782 } 783 PF_STATE_UNLOCK(st); 784 } 785 /* 786 * XXX this is not yet implemented, but we know the size of the 787 * message so we can skip it. 788 */ 789 790 return (count * sizeof(struct pfsync_ins_ack)); 791 } 792 793 static int 794 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 795 struct pfsync_state_peer *dst) 796 { 797 int sync = 0; 798 799 PF_STATE_LOCK_ASSERT(st); 800 801 /* 802 * The state should never go backwards except 803 * for syn-proxy states. Neither should the 804 * sequence window slide backwards. 805 */ 806 if ((st->src.state > src->state && 807 (st->src.state < PF_TCPS_PROXY_SRC || 808 src->state >= PF_TCPS_PROXY_SRC)) || 809 810 (st->src.state == src->state && 811 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 812 sync++; 813 else 814 pf_state_peer_ntoh(src, &st->src); 815 816 if ((st->dst.state > dst->state) || 817 818 (st->dst.state >= TCPS_SYN_SENT && 819 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 820 sync++; 821 else 822 pf_state_peer_ntoh(dst, &st->dst); 823 824 return (sync); 825 } 826 827 static int 828 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 829 { 830 struct pfsync_softc *sc = V_pfsyncif; 831 struct pfsync_state *sa, *sp; 832 struct pf_state *st; 833 int sync; 834 835 struct mbuf *mp; 836 int len = count * sizeof(*sp); 837 int offp, i; 838 839 mp = m_pulldown(m, offset, len, &offp); 840 if (mp == NULL) { 841 V_pfsyncstats.pfsyncs_badlen++; 842 return (-1); 843 } 844 sa = (struct pfsync_state *)(mp->m_data + offp); 845 846 for (i = 0; i < count; i++) { 847 sp = &sa[i]; 848 849 /* check for invalid values */ 850 if (sp->timeout >= PFTM_MAX || 851 sp->src.state > PF_TCPS_PROXY_DST || 852 sp->dst.state > PF_TCPS_PROXY_DST) { 853 if (V_pf_status.debug >= PF_DEBUG_MISC) { 854 printf("pfsync_input: PFSYNC_ACT_UPD: " 855 "invalid value\n"); 856 } 857 V_pfsyncstats.pfsyncs_badval++; 858 continue; 859 } 860 861 st = pf_find_state_byid(sp->id, sp->creatorid); 862 if (st == NULL) { 863 /* insert the update */ 864 if (pfsync_state_import(sp, 0)) 865 V_pfsyncstats.pfsyncs_badstate++; 866 continue; 867 } 868 869 if (st->state_flags & PFSTATE_ACK) { 870 PFSYNC_LOCK(sc); 871 pfsync_undefer_state(st, 1); 872 PFSYNC_UNLOCK(sc); 873 } 874 875 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 876 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 877 else { 878 sync = 0; 879 880 /* 881 * Non-TCP protocol state machine always go 882 * forwards 883 */ 884 if (st->src.state > sp->src.state) 885 sync++; 886 else 887 pf_state_peer_ntoh(&sp->src, &st->src); 888 if (st->dst.state > sp->dst.state) 889 sync++; 890 else 891 pf_state_peer_ntoh(&sp->dst, &st->dst); 892 } 893 if (sync < 2) { 894 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 895 pf_state_peer_ntoh(&sp->dst, &st->dst); 896 st->expire = time_uptime; 897 st->timeout = sp->timeout; 898 } 899 st->pfsync_time = time_uptime; 900 901 if (sync) { 902 V_pfsyncstats.pfsyncs_stale++; 903 904 pfsync_update_state(st); 905 PF_STATE_UNLOCK(st); 906 PFSYNC_LOCK(sc); 907 pfsync_push(sc); 908 PFSYNC_UNLOCK(sc); 909 continue; 910 } 911 PF_STATE_UNLOCK(st); 912 } 913 914 return (len); 915 } 916 917 static int 918 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 919 { 920 struct pfsync_softc *sc = V_pfsyncif; 921 struct pfsync_upd_c *ua, *up; 922 struct pf_state *st; 923 int len = count * sizeof(*up); 924 int sync; 925 struct mbuf *mp; 926 int offp, i; 927 928 mp = m_pulldown(m, offset, len, &offp); 929 if (mp == NULL) { 930 V_pfsyncstats.pfsyncs_badlen++; 931 return (-1); 932 } 933 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 934 935 for (i = 0; i < count; i++) { 936 up = &ua[i]; 937 938 /* check for invalid values */ 939 if (up->timeout >= PFTM_MAX || 940 up->src.state > PF_TCPS_PROXY_DST || 941 up->dst.state > PF_TCPS_PROXY_DST) { 942 if (V_pf_status.debug >= PF_DEBUG_MISC) { 943 printf("pfsync_input: " 944 "PFSYNC_ACT_UPD_C: " 945 "invalid value\n"); 946 } 947 V_pfsyncstats.pfsyncs_badval++; 948 continue; 949 } 950 951 st = pf_find_state_byid(up->id, up->creatorid); 952 if (st == NULL) { 953 /* We don't have this state. Ask for it. */ 954 PFSYNC_LOCK(sc); 955 pfsync_request_update(up->creatorid, up->id); 956 PFSYNC_UNLOCK(sc); 957 continue; 958 } 959 960 if (st->state_flags & PFSTATE_ACK) { 961 PFSYNC_LOCK(sc); 962 pfsync_undefer_state(st, 1); 963 PFSYNC_UNLOCK(sc); 964 } 965 966 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 967 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 968 else { 969 sync = 0; 970 971 /* 972 * Non-TCP protocol state machine always go 973 * forwards 974 */ 975 if (st->src.state > up->src.state) 976 sync++; 977 else 978 pf_state_peer_ntoh(&up->src, &st->src); 979 if (st->dst.state > up->dst.state) 980 sync++; 981 else 982 pf_state_peer_ntoh(&up->dst, &st->dst); 983 } 984 if (sync < 2) { 985 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 986 pf_state_peer_ntoh(&up->dst, &st->dst); 987 st->expire = time_uptime; 988 st->timeout = up->timeout; 989 } 990 st->pfsync_time = time_uptime; 991 992 if (sync) { 993 V_pfsyncstats.pfsyncs_stale++; 994 995 pfsync_update_state(st); 996 PF_STATE_UNLOCK(st); 997 PFSYNC_LOCK(sc); 998 pfsync_push(sc); 999 PFSYNC_UNLOCK(sc); 1000 continue; 1001 } 1002 PF_STATE_UNLOCK(st); 1003 } 1004 1005 return (len); 1006 } 1007 1008 static int 1009 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1010 { 1011 struct pfsync_upd_req *ur, *ura; 1012 struct mbuf *mp; 1013 int len = count * sizeof(*ur); 1014 int i, offp; 1015 1016 struct pf_state *st; 1017 1018 mp = m_pulldown(m, offset, len, &offp); 1019 if (mp == NULL) { 1020 V_pfsyncstats.pfsyncs_badlen++; 1021 return (-1); 1022 } 1023 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1024 1025 for (i = 0; i < count; i++) { 1026 ur = &ura[i]; 1027 1028 if (ur->id == 0 && ur->creatorid == 0) 1029 pfsync_bulk_start(); 1030 else { 1031 st = pf_find_state_byid(ur->id, ur->creatorid); 1032 if (st == NULL) { 1033 V_pfsyncstats.pfsyncs_badstate++; 1034 continue; 1035 } 1036 if (st->state_flags & PFSTATE_NOSYNC) { 1037 PF_STATE_UNLOCK(st); 1038 continue; 1039 } 1040 1041 pfsync_update_state_req(st); 1042 PF_STATE_UNLOCK(st); 1043 } 1044 } 1045 1046 return (len); 1047 } 1048 1049 static int 1050 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1051 { 1052 struct mbuf *mp; 1053 struct pfsync_state *sa, *sp; 1054 struct pf_state *st; 1055 int len = count * sizeof(*sp); 1056 int offp, i; 1057 1058 mp = m_pulldown(m, offset, len, &offp); 1059 if (mp == NULL) { 1060 V_pfsyncstats.pfsyncs_badlen++; 1061 return (-1); 1062 } 1063 sa = (struct pfsync_state *)(mp->m_data + offp); 1064 1065 for (i = 0; i < count; i++) { 1066 sp = &sa[i]; 1067 1068 st = pf_find_state_byid(sp->id, sp->creatorid); 1069 if (st == NULL) { 1070 V_pfsyncstats.pfsyncs_badstate++; 1071 continue; 1072 } 1073 st->state_flags |= PFSTATE_NOSYNC; 1074 pf_unlink_state(st, PF_ENTER_LOCKED); 1075 } 1076 1077 return (len); 1078 } 1079 1080 static int 1081 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1082 { 1083 struct mbuf *mp; 1084 struct pfsync_del_c *sa, *sp; 1085 struct pf_state *st; 1086 int len = count * sizeof(*sp); 1087 int offp, i; 1088 1089 mp = m_pulldown(m, offset, len, &offp); 1090 if (mp == NULL) { 1091 V_pfsyncstats.pfsyncs_badlen++; 1092 return (-1); 1093 } 1094 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1095 1096 for (i = 0; i < count; i++) { 1097 sp = &sa[i]; 1098 1099 st = pf_find_state_byid(sp->id, sp->creatorid); 1100 if (st == NULL) { 1101 V_pfsyncstats.pfsyncs_badstate++; 1102 continue; 1103 } 1104 1105 st->state_flags |= PFSTATE_NOSYNC; 1106 pf_unlink_state(st, PF_ENTER_LOCKED); 1107 } 1108 1109 return (len); 1110 } 1111 1112 static int 1113 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1114 { 1115 struct pfsync_softc *sc = V_pfsyncif; 1116 struct pfsync_bus *bus; 1117 struct mbuf *mp; 1118 int len = count * sizeof(*bus); 1119 int offp; 1120 1121 PFSYNC_BLOCK(sc); 1122 1123 /* If we're not waiting for a bulk update, who cares. */ 1124 if (sc->sc_ureq_sent == 0) { 1125 PFSYNC_BUNLOCK(sc); 1126 return (len); 1127 } 1128 1129 mp = m_pulldown(m, offset, len, &offp); 1130 if (mp == NULL) { 1131 PFSYNC_BUNLOCK(sc); 1132 V_pfsyncstats.pfsyncs_badlen++; 1133 return (-1); 1134 } 1135 bus = (struct pfsync_bus *)(mp->m_data + offp); 1136 1137 switch (bus->status) { 1138 case PFSYNC_BUS_START: 1139 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1140 V_pf_limits[PF_LIMIT_STATES].limit / 1141 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1142 sizeof(struct pfsync_state)), 1143 pfsync_bulk_fail, sc); 1144 if (V_pf_status.debug >= PF_DEBUG_MISC) 1145 printf("pfsync: received bulk update start\n"); 1146 break; 1147 1148 case PFSYNC_BUS_END: 1149 if (time_uptime - ntohl(bus->endtime) >= 1150 sc->sc_ureq_sent) { 1151 /* that's it, we're happy */ 1152 sc->sc_ureq_sent = 0; 1153 sc->sc_bulk_tries = 0; 1154 callout_stop(&sc->sc_bulkfail_tmo); 1155 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1156 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1157 "pfsync bulk done"); 1158 sc->sc_flags |= PFSYNCF_OK; 1159 if (V_pf_status.debug >= PF_DEBUG_MISC) 1160 printf("pfsync: received valid " 1161 "bulk update end\n"); 1162 } else { 1163 if (V_pf_status.debug >= PF_DEBUG_MISC) 1164 printf("pfsync: received invalid " 1165 "bulk update end: bad timestamp\n"); 1166 } 1167 break; 1168 } 1169 PFSYNC_BUNLOCK(sc); 1170 1171 return (len); 1172 } 1173 1174 static int 1175 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1176 { 1177 int len = count * sizeof(struct pfsync_tdb); 1178 1179 #if defined(IPSEC) 1180 struct pfsync_tdb *tp; 1181 struct mbuf *mp; 1182 int offp; 1183 int i; 1184 int s; 1185 1186 mp = m_pulldown(m, offset, len, &offp); 1187 if (mp == NULL) { 1188 V_pfsyncstats.pfsyncs_badlen++; 1189 return (-1); 1190 } 1191 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1192 1193 for (i = 0; i < count; i++) 1194 pfsync_update_net_tdb(&tp[i]); 1195 #endif 1196 1197 return (len); 1198 } 1199 1200 #if defined(IPSEC) 1201 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1202 static void 1203 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1204 { 1205 struct tdb *tdb; 1206 int s; 1207 1208 /* check for invalid values */ 1209 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1210 (pt->dst.sa.sa_family != AF_INET && 1211 pt->dst.sa.sa_family != AF_INET6)) 1212 goto bad; 1213 1214 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1215 if (tdb) { 1216 pt->rpl = ntohl(pt->rpl); 1217 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1218 1219 /* Neither replay nor byte counter should ever decrease. */ 1220 if (pt->rpl < tdb->tdb_rpl || 1221 pt->cur_bytes < tdb->tdb_cur_bytes) { 1222 goto bad; 1223 } 1224 1225 tdb->tdb_rpl = pt->rpl; 1226 tdb->tdb_cur_bytes = pt->cur_bytes; 1227 } 1228 return; 1229 1230 bad: 1231 if (V_pf_status.debug >= PF_DEBUG_MISC) 1232 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1233 "invalid value\n"); 1234 V_pfsyncstats.pfsyncs_badstate++; 1235 return; 1236 } 1237 #endif 1238 1239 1240 static int 1241 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1242 { 1243 /* check if we are at the right place in the packet */ 1244 if (offset != m->m_pkthdr.len) 1245 V_pfsyncstats.pfsyncs_badlen++; 1246 1247 /* we're done. free and let the caller return */ 1248 m_freem(m); 1249 return (-1); 1250 } 1251 1252 static int 1253 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1254 { 1255 V_pfsyncstats.pfsyncs_badact++; 1256 1257 m_freem(m); 1258 return (-1); 1259 } 1260 1261 static int 1262 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1263 struct route *rt) 1264 { 1265 m_freem(m); 1266 return (0); 1267 } 1268 1269 /* ARGSUSED */ 1270 static int 1271 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1272 { 1273 struct pfsync_softc *sc = ifp->if_softc; 1274 struct ifreq *ifr = (struct ifreq *)data; 1275 struct pfsyncreq pfsyncr; 1276 int error; 1277 1278 switch (cmd) { 1279 case SIOCSIFFLAGS: 1280 PFSYNC_LOCK(sc); 1281 if (ifp->if_flags & IFF_UP) { 1282 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1283 PFSYNC_UNLOCK(sc); 1284 pfsync_pointers_init(); 1285 } else { 1286 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1287 PFSYNC_UNLOCK(sc); 1288 pfsync_pointers_uninit(); 1289 } 1290 break; 1291 case SIOCSIFMTU: 1292 if (!sc->sc_sync_if || 1293 ifr->ifr_mtu <= PFSYNC_MINPKT || 1294 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1295 return (EINVAL); 1296 if (ifr->ifr_mtu < ifp->if_mtu) { 1297 PFSYNC_LOCK(sc); 1298 if (sc->sc_len > PFSYNC_MINPKT) 1299 pfsync_sendout(1); 1300 PFSYNC_UNLOCK(sc); 1301 } 1302 ifp->if_mtu = ifr->ifr_mtu; 1303 break; 1304 case SIOCGETPFSYNC: 1305 bzero(&pfsyncr, sizeof(pfsyncr)); 1306 PFSYNC_LOCK(sc); 1307 if (sc->sc_sync_if) { 1308 strlcpy(pfsyncr.pfsyncr_syncdev, 1309 sc->sc_sync_if->if_xname, IFNAMSIZ); 1310 } 1311 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1312 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1313 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1314 (sc->sc_flags & PFSYNCF_DEFER)); 1315 PFSYNC_UNLOCK(sc); 1316 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1317 1318 case SIOCSETPFSYNC: 1319 { 1320 struct ip_moptions *imo = &sc->sc_imo; 1321 struct ifnet *sifp; 1322 struct ip *ip; 1323 void *mship = NULL; 1324 1325 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1326 return (error); 1327 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1328 return (error); 1329 1330 if (pfsyncr.pfsyncr_maxupdates > 255) 1331 return (EINVAL); 1332 1333 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1334 sifp = NULL; 1335 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1336 return (EINVAL); 1337 1338 if (sifp != NULL && ( 1339 pfsyncr.pfsyncr_syncpeer.s_addr == 0 || 1340 pfsyncr.pfsyncr_syncpeer.s_addr == 1341 htonl(INADDR_PFSYNC_GROUP))) 1342 mship = malloc((sizeof(struct in_multi *) * 1343 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1344 1345 PFSYNC_LOCK(sc); 1346 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1347 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1348 else 1349 sc->sc_sync_peer.s_addr = 1350 pfsyncr.pfsyncr_syncpeer.s_addr; 1351 1352 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1353 if (pfsyncr.pfsyncr_defer) { 1354 sc->sc_flags |= PFSYNCF_DEFER; 1355 pfsync_defer_ptr = pfsync_defer; 1356 } else { 1357 sc->sc_flags &= ~PFSYNCF_DEFER; 1358 pfsync_defer_ptr = NULL; 1359 } 1360 1361 if (sifp == NULL) { 1362 if (sc->sc_sync_if) 1363 if_rele(sc->sc_sync_if); 1364 sc->sc_sync_if = NULL; 1365 if (imo->imo_membership) 1366 pfsync_multicast_cleanup(sc); 1367 PFSYNC_UNLOCK(sc); 1368 break; 1369 } 1370 1371 if (sc->sc_len > PFSYNC_MINPKT && 1372 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1373 (sc->sc_sync_if != NULL && 1374 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1375 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1376 pfsync_sendout(1); 1377 1378 if (imo->imo_membership) 1379 pfsync_multicast_cleanup(sc); 1380 1381 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1382 error = pfsync_multicast_setup(sc, sifp, mship); 1383 if (error) { 1384 if_rele(sifp); 1385 free(mship, M_PFSYNC); 1386 return (error); 1387 } 1388 } 1389 if (sc->sc_sync_if) 1390 if_rele(sc->sc_sync_if); 1391 sc->sc_sync_if = sifp; 1392 1393 ip = &sc->sc_template; 1394 bzero(ip, sizeof(*ip)); 1395 ip->ip_v = IPVERSION; 1396 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1397 ip->ip_tos = IPTOS_LOWDELAY; 1398 /* len and id are set later. */ 1399 ip->ip_off = htons(IP_DF); 1400 ip->ip_ttl = PFSYNC_DFLTTL; 1401 ip->ip_p = IPPROTO_PFSYNC; 1402 ip->ip_src.s_addr = INADDR_ANY; 1403 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1404 1405 /* Request a full state table update. */ 1406 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1407 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1408 "pfsync bulk start"); 1409 sc->sc_flags &= ~PFSYNCF_OK; 1410 if (V_pf_status.debug >= PF_DEBUG_MISC) 1411 printf("pfsync: requesting bulk update\n"); 1412 pfsync_request_update(0, 0); 1413 PFSYNC_UNLOCK(sc); 1414 PFSYNC_BLOCK(sc); 1415 sc->sc_ureq_sent = time_uptime; 1416 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1417 sc); 1418 PFSYNC_BUNLOCK(sc); 1419 1420 break; 1421 } 1422 default: 1423 return (ENOTTY); 1424 } 1425 1426 return (0); 1427 } 1428 1429 static void 1430 pfsync_out_state(struct pf_state *st, void *buf) 1431 { 1432 struct pfsync_state *sp = buf; 1433 1434 pfsync_state_export(sp, st); 1435 } 1436 1437 static void 1438 pfsync_out_iack(struct pf_state *st, void *buf) 1439 { 1440 struct pfsync_ins_ack *iack = buf; 1441 1442 iack->id = st->id; 1443 iack->creatorid = st->creatorid; 1444 } 1445 1446 static void 1447 pfsync_out_upd_c(struct pf_state *st, void *buf) 1448 { 1449 struct pfsync_upd_c *up = buf; 1450 1451 bzero(up, sizeof(*up)); 1452 up->id = st->id; 1453 pf_state_peer_hton(&st->src, &up->src); 1454 pf_state_peer_hton(&st->dst, &up->dst); 1455 up->creatorid = st->creatorid; 1456 up->timeout = st->timeout; 1457 } 1458 1459 static void 1460 pfsync_out_del(struct pf_state *st, void *buf) 1461 { 1462 struct pfsync_del_c *dp = buf; 1463 1464 dp->id = st->id; 1465 dp->creatorid = st->creatorid; 1466 st->state_flags |= PFSTATE_NOSYNC; 1467 } 1468 1469 static void 1470 pfsync_drop(struct pfsync_softc *sc) 1471 { 1472 struct pf_state *st, *next; 1473 struct pfsync_upd_req_item *ur; 1474 int q; 1475 1476 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1477 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1478 continue; 1479 1480 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1481 KASSERT(st->sync_state == q, 1482 ("%s: st->sync_state == q", 1483 __func__)); 1484 st->sync_state = PFSYNC_S_NONE; 1485 pf_release_state(st); 1486 } 1487 TAILQ_INIT(&sc->sc_qs[q]); 1488 } 1489 1490 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1491 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1492 free(ur, M_PFSYNC); 1493 } 1494 1495 sc->sc_plus = NULL; 1496 sc->sc_len = PFSYNC_MINPKT; 1497 } 1498 1499 static void 1500 pfsync_sendout(int schedswi) 1501 { 1502 struct pfsync_softc *sc = V_pfsyncif; 1503 struct ifnet *ifp = sc->sc_ifp; 1504 struct mbuf *m; 1505 struct ip *ip; 1506 struct pfsync_header *ph; 1507 struct pfsync_subheader *subh; 1508 struct pf_state *st; 1509 struct pfsync_upd_req_item *ur; 1510 int offset; 1511 int q, count = 0; 1512 1513 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1514 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1515 ("%s: sc_len %zu", __func__, sc->sc_len)); 1516 PFSYNC_LOCK_ASSERT(sc); 1517 1518 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1519 pfsync_drop(sc); 1520 return; 1521 } 1522 1523 m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1524 if (m == NULL) { 1525 sc->sc_ifp->if_oerrors++; 1526 V_pfsyncstats.pfsyncs_onomem++; 1527 return; 1528 } 1529 m->m_data += max_linkhdr; 1530 m->m_len = m->m_pkthdr.len = sc->sc_len; 1531 1532 /* build the ip header */ 1533 ip = (struct ip *)m->m_data; 1534 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1535 offset = sizeof(*ip); 1536 1537 ip->ip_len = htons(m->m_pkthdr.len); 1538 ip->ip_id = htons(ip_randomid()); 1539 1540 /* build the pfsync header */ 1541 ph = (struct pfsync_header *)(m->m_data + offset); 1542 bzero(ph, sizeof(*ph)); 1543 offset += sizeof(*ph); 1544 1545 ph->version = PFSYNC_VERSION; 1546 ph->len = htons(sc->sc_len - sizeof(*ip)); 1547 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1548 1549 /* walk the queues */ 1550 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1551 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1552 continue; 1553 1554 subh = (struct pfsync_subheader *)(m->m_data + offset); 1555 offset += sizeof(*subh); 1556 1557 count = 0; 1558 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1559 KASSERT(st->sync_state == q, 1560 ("%s: st->sync_state == q", 1561 __func__)); 1562 /* 1563 * XXXGL: some of write methods do unlocked reads 1564 * of state data :( 1565 */ 1566 pfsync_qs[q].write(st, m->m_data + offset); 1567 offset += pfsync_qs[q].len; 1568 st->sync_state = PFSYNC_S_NONE; 1569 pf_release_state(st); 1570 count++; 1571 } 1572 TAILQ_INIT(&sc->sc_qs[q]); 1573 1574 bzero(subh, sizeof(*subh)); 1575 subh->action = pfsync_qs[q].action; 1576 subh->count = htons(count); 1577 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1578 } 1579 1580 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1581 subh = (struct pfsync_subheader *)(m->m_data + offset); 1582 offset += sizeof(*subh); 1583 1584 count = 0; 1585 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1586 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1587 1588 bcopy(&ur->ur_msg, m->m_data + offset, 1589 sizeof(ur->ur_msg)); 1590 offset += sizeof(ur->ur_msg); 1591 free(ur, M_PFSYNC); 1592 count++; 1593 } 1594 1595 bzero(subh, sizeof(*subh)); 1596 subh->action = PFSYNC_ACT_UPD_REQ; 1597 subh->count = htons(count); 1598 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1599 } 1600 1601 /* has someone built a custom region for us to add? */ 1602 if (sc->sc_plus != NULL) { 1603 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1604 offset += sc->sc_pluslen; 1605 1606 sc->sc_plus = NULL; 1607 } 1608 1609 subh = (struct pfsync_subheader *)(m->m_data + offset); 1610 offset += sizeof(*subh); 1611 1612 bzero(subh, sizeof(*subh)); 1613 subh->action = PFSYNC_ACT_EOF; 1614 subh->count = htons(1); 1615 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1616 1617 /* we're done, let's put it on the wire */ 1618 if (ifp->if_bpf) { 1619 m->m_data += sizeof(*ip); 1620 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1621 BPF_MTAP(ifp, m); 1622 m->m_data -= sizeof(*ip); 1623 m->m_len = m->m_pkthdr.len = sc->sc_len; 1624 } 1625 1626 if (sc->sc_sync_if == NULL) { 1627 sc->sc_len = PFSYNC_MINPKT; 1628 m_freem(m); 1629 return; 1630 } 1631 1632 sc->sc_ifp->if_opackets++; 1633 sc->sc_ifp->if_obytes += m->m_pkthdr.len; 1634 sc->sc_len = PFSYNC_MINPKT; 1635 1636 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1637 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1638 else { 1639 m_freem(m); 1640 sc->sc_ifp->if_snd.ifq_drops++; 1641 } 1642 if (schedswi) 1643 swi_sched(V_pfsync_swi_cookie, 0); 1644 } 1645 1646 static void 1647 pfsync_insert_state(struct pf_state *st) 1648 { 1649 struct pfsync_softc *sc = V_pfsyncif; 1650 1651 if (st->state_flags & PFSTATE_NOSYNC) 1652 return; 1653 1654 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1655 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1656 st->state_flags |= PFSTATE_NOSYNC; 1657 return; 1658 } 1659 1660 KASSERT(st->sync_state == PFSYNC_S_NONE, 1661 ("%s: st->sync_state %u", __func__, st->sync_state)); 1662 1663 PFSYNC_LOCK(sc); 1664 if (sc->sc_len == PFSYNC_MINPKT) 1665 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1666 1667 pfsync_q_ins(st, PFSYNC_S_INS); 1668 PFSYNC_UNLOCK(sc); 1669 1670 st->sync_updates = 0; 1671 } 1672 1673 static int 1674 pfsync_defer(struct pf_state *st, struct mbuf *m) 1675 { 1676 struct pfsync_softc *sc = V_pfsyncif; 1677 struct pfsync_deferral *pd; 1678 1679 if (m->m_flags & (M_BCAST|M_MCAST)) 1680 return (0); 1681 1682 PFSYNC_LOCK(sc); 1683 1684 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1685 !(sc->sc_flags & PFSYNCF_DEFER)) { 1686 PFSYNC_UNLOCK(sc); 1687 return (0); 1688 } 1689 1690 if (sc->sc_deferred >= 128) 1691 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1692 1693 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1694 if (pd == NULL) 1695 return (0); 1696 sc->sc_deferred++; 1697 1698 m->m_flags |= M_SKIP_FIREWALL; 1699 st->state_flags |= PFSTATE_ACK; 1700 1701 pd->pd_sc = sc; 1702 pd->pd_refs = 0; 1703 pd->pd_st = st; 1704 pf_ref_state(st); 1705 pd->pd_m = m; 1706 1707 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1708 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1709 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1710 1711 pfsync_push(sc); 1712 1713 return (1); 1714 } 1715 1716 static void 1717 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1718 { 1719 struct pfsync_softc *sc = pd->pd_sc; 1720 struct mbuf *m = pd->pd_m; 1721 struct pf_state *st = pd->pd_st; 1722 1723 PFSYNC_LOCK_ASSERT(sc); 1724 1725 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1726 sc->sc_deferred--; 1727 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1728 free(pd, M_PFSYNC); 1729 pf_release_state(st); 1730 1731 if (drop) 1732 m_freem(m); 1733 else { 1734 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1735 pfsync_push(sc); 1736 } 1737 } 1738 1739 static void 1740 pfsync_defer_tmo(void *arg) 1741 { 1742 struct pfsync_deferral *pd = arg; 1743 struct pfsync_softc *sc = pd->pd_sc; 1744 struct mbuf *m = pd->pd_m; 1745 struct pf_state *st = pd->pd_st; 1746 1747 PFSYNC_LOCK_ASSERT(sc); 1748 1749 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1750 1751 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1752 sc->sc_deferred--; 1753 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1754 if (pd->pd_refs == 0) 1755 free(pd, M_PFSYNC); 1756 PFSYNC_UNLOCK(sc); 1757 1758 ip_output(m, NULL, NULL, 0, NULL, NULL); 1759 1760 pf_release_state(st); 1761 1762 CURVNET_RESTORE(); 1763 } 1764 1765 static void 1766 pfsync_undefer_state(struct pf_state *st, int drop) 1767 { 1768 struct pfsync_softc *sc = V_pfsyncif; 1769 struct pfsync_deferral *pd; 1770 1771 PFSYNC_LOCK_ASSERT(sc); 1772 1773 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1774 if (pd->pd_st == st) { 1775 if (callout_stop(&pd->pd_tmo)) 1776 pfsync_undefer(pd, drop); 1777 return; 1778 } 1779 } 1780 1781 panic("%s: unable to find deferred state", __func__); 1782 } 1783 1784 static void 1785 pfsync_update_state(struct pf_state *st) 1786 { 1787 struct pfsync_softc *sc = V_pfsyncif; 1788 int sync = 0; 1789 1790 PF_STATE_LOCK_ASSERT(st); 1791 PFSYNC_LOCK(sc); 1792 1793 if (st->state_flags & PFSTATE_ACK) 1794 pfsync_undefer_state(st, 0); 1795 if (st->state_flags & PFSTATE_NOSYNC) { 1796 if (st->sync_state != PFSYNC_S_NONE) 1797 pfsync_q_del(st); 1798 PFSYNC_UNLOCK(sc); 1799 return; 1800 } 1801 1802 if (sc->sc_len == PFSYNC_MINPKT) 1803 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1804 1805 switch (st->sync_state) { 1806 case PFSYNC_S_UPD_C: 1807 case PFSYNC_S_UPD: 1808 case PFSYNC_S_INS: 1809 /* we're already handling it */ 1810 1811 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1812 st->sync_updates++; 1813 if (st->sync_updates >= sc->sc_maxupdates) 1814 sync = 1; 1815 } 1816 break; 1817 1818 case PFSYNC_S_IACK: 1819 pfsync_q_del(st); 1820 case PFSYNC_S_NONE: 1821 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1822 st->sync_updates = 0; 1823 break; 1824 1825 default: 1826 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1827 } 1828 1829 if (sync || (time_uptime - st->pfsync_time) < 2) 1830 pfsync_push(sc); 1831 1832 PFSYNC_UNLOCK(sc); 1833 } 1834 1835 static void 1836 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1837 { 1838 struct pfsync_softc *sc = V_pfsyncif; 1839 struct pfsync_upd_req_item *item; 1840 size_t nlen = sizeof(struct pfsync_upd_req); 1841 1842 PFSYNC_LOCK_ASSERT(sc); 1843 1844 /* 1845 * This code does a bit to prevent multiple update requests for the 1846 * same state being generated. It searches current subheader queue, 1847 * but it doesn't lookup into queue of already packed datagrams. 1848 */ 1849 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1850 if (item->ur_msg.id == id && 1851 item->ur_msg.creatorid == creatorid) 1852 return; 1853 1854 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1855 if (item == NULL) 1856 return; /* XXX stats */ 1857 1858 item->ur_msg.id = id; 1859 item->ur_msg.creatorid = creatorid; 1860 1861 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1862 nlen += sizeof(struct pfsync_subheader); 1863 1864 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1865 pfsync_sendout(1); 1866 1867 nlen = sizeof(struct pfsync_subheader) + 1868 sizeof(struct pfsync_upd_req); 1869 } 1870 1871 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1872 sc->sc_len += nlen; 1873 } 1874 1875 static void 1876 pfsync_update_state_req(struct pf_state *st) 1877 { 1878 struct pfsync_softc *sc = V_pfsyncif; 1879 1880 PF_STATE_LOCK_ASSERT(st); 1881 PFSYNC_LOCK(sc); 1882 1883 if (st->state_flags & PFSTATE_NOSYNC) { 1884 if (st->sync_state != PFSYNC_S_NONE) 1885 pfsync_q_del(st); 1886 PFSYNC_UNLOCK(sc); 1887 return; 1888 } 1889 1890 switch (st->sync_state) { 1891 case PFSYNC_S_UPD_C: 1892 case PFSYNC_S_IACK: 1893 pfsync_q_del(st); 1894 case PFSYNC_S_NONE: 1895 pfsync_q_ins(st, PFSYNC_S_UPD); 1896 pfsync_push(sc); 1897 break; 1898 1899 case PFSYNC_S_INS: 1900 case PFSYNC_S_UPD: 1901 case PFSYNC_S_DEL: 1902 /* we're already handling it */ 1903 break; 1904 1905 default: 1906 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1907 } 1908 1909 PFSYNC_UNLOCK(sc); 1910 } 1911 1912 static void 1913 pfsync_delete_state(struct pf_state *st) 1914 { 1915 struct pfsync_softc *sc = V_pfsyncif; 1916 1917 PFSYNC_LOCK(sc); 1918 if (st->state_flags & PFSTATE_ACK) 1919 pfsync_undefer_state(st, 1); 1920 if (st->state_flags & PFSTATE_NOSYNC) { 1921 if (st->sync_state != PFSYNC_S_NONE) 1922 pfsync_q_del(st); 1923 PFSYNC_UNLOCK(sc); 1924 return; 1925 } 1926 1927 if (sc->sc_len == PFSYNC_MINPKT) 1928 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1929 1930 switch (st->sync_state) { 1931 case PFSYNC_S_INS: 1932 /* We never got to tell the world so just forget about it. */ 1933 pfsync_q_del(st); 1934 break; 1935 1936 case PFSYNC_S_UPD_C: 1937 case PFSYNC_S_UPD: 1938 case PFSYNC_S_IACK: 1939 pfsync_q_del(st); 1940 /* FALLTHROUGH to putting it on the del list */ 1941 1942 case PFSYNC_S_NONE: 1943 pfsync_q_ins(st, PFSYNC_S_DEL); 1944 break; 1945 1946 default: 1947 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1948 } 1949 PFSYNC_UNLOCK(sc); 1950 } 1951 1952 static void 1953 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1954 { 1955 struct pfsync_softc *sc = V_pfsyncif; 1956 struct { 1957 struct pfsync_subheader subh; 1958 struct pfsync_clr clr; 1959 } __packed r; 1960 1961 bzero(&r, sizeof(r)); 1962 1963 r.subh.action = PFSYNC_ACT_CLR; 1964 r.subh.count = htons(1); 1965 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1966 1967 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1968 r.clr.creatorid = creatorid; 1969 1970 PFSYNC_LOCK(sc); 1971 pfsync_send_plus(&r, sizeof(r)); 1972 PFSYNC_UNLOCK(sc); 1973 } 1974 1975 static void 1976 pfsync_q_ins(struct pf_state *st, int q) 1977 { 1978 struct pfsync_softc *sc = V_pfsyncif; 1979 size_t nlen = pfsync_qs[q].len; 1980 1981 PFSYNC_LOCK_ASSERT(sc); 1982 1983 KASSERT(st->sync_state == PFSYNC_S_NONE, 1984 ("%s: st->sync_state %u", __func__, st->sync_state)); 1985 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 1986 sc->sc_len)); 1987 1988 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1989 nlen += sizeof(struct pfsync_subheader); 1990 1991 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1992 pfsync_sendout(1); 1993 1994 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 1995 } 1996 1997 sc->sc_len += nlen; 1998 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 1999 st->sync_state = q; 2000 pf_ref_state(st); 2001 } 2002 2003 static void 2004 pfsync_q_del(struct pf_state *st) 2005 { 2006 struct pfsync_softc *sc = V_pfsyncif; 2007 int q = st->sync_state; 2008 2009 PFSYNC_LOCK_ASSERT(sc); 2010 KASSERT(st->sync_state != PFSYNC_S_NONE, 2011 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2012 2013 sc->sc_len -= pfsync_qs[q].len; 2014 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2015 st->sync_state = PFSYNC_S_NONE; 2016 pf_release_state(st); 2017 2018 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2019 sc->sc_len -= sizeof(struct pfsync_subheader); 2020 } 2021 2022 static void 2023 pfsync_bulk_start(void) 2024 { 2025 struct pfsync_softc *sc = V_pfsyncif; 2026 2027 if (V_pf_status.debug >= PF_DEBUG_MISC) 2028 printf("pfsync: received bulk update request\n"); 2029 2030 PFSYNC_BLOCK(sc); 2031 2032 sc->sc_ureq_received = time_uptime; 2033 sc->sc_bulk_hashid = 0; 2034 sc->sc_bulk_stateid = 0; 2035 pfsync_bulk_status(PFSYNC_BUS_START); 2036 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2037 PFSYNC_BUNLOCK(sc); 2038 } 2039 2040 static void 2041 pfsync_bulk_update(void *arg) 2042 { 2043 struct pfsync_softc *sc = arg; 2044 struct pf_state *s; 2045 int i, sent = 0; 2046 2047 PFSYNC_BLOCK_ASSERT(sc); 2048 CURVNET_SET(sc->sc_ifp->if_vnet); 2049 2050 /* 2051 * Start with last state from previous invocation. 2052 * It may had gone, in this case start from the 2053 * hash slot. 2054 */ 2055 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2056 2057 if (s != NULL) 2058 i = PF_IDHASH(s); 2059 else 2060 i = sc->sc_bulk_hashid; 2061 2062 for (; i <= pf_hashmask; i++) { 2063 struct pf_idhash *ih = &V_pf_idhash[i]; 2064 2065 if (s != NULL) 2066 PF_HASHROW_ASSERT(ih); 2067 else { 2068 PF_HASHROW_LOCK(ih); 2069 s = LIST_FIRST(&ih->states); 2070 } 2071 2072 for (; s; s = LIST_NEXT(s, entry)) { 2073 2074 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2075 sizeof(struct pfsync_state)) { 2076 /* We've filled a packet. */ 2077 sc->sc_bulk_hashid = i; 2078 sc->sc_bulk_stateid = s->id; 2079 sc->sc_bulk_creatorid = s->creatorid; 2080 PF_HASHROW_UNLOCK(ih); 2081 callout_reset(&sc->sc_bulk_tmo, 1, 2082 pfsync_bulk_update, sc); 2083 goto full; 2084 } 2085 2086 if (s->sync_state == PFSYNC_S_NONE && 2087 s->timeout < PFTM_MAX && 2088 s->pfsync_time <= sc->sc_ureq_received) { 2089 pfsync_update_state_req(s); 2090 sent++; 2091 } 2092 } 2093 PF_HASHROW_UNLOCK(ih); 2094 } 2095 2096 /* We're done. */ 2097 pfsync_bulk_status(PFSYNC_BUS_END); 2098 2099 full: 2100 CURVNET_RESTORE(); 2101 } 2102 2103 static void 2104 pfsync_bulk_status(u_int8_t status) 2105 { 2106 struct { 2107 struct pfsync_subheader subh; 2108 struct pfsync_bus bus; 2109 } __packed r; 2110 2111 struct pfsync_softc *sc = V_pfsyncif; 2112 2113 bzero(&r, sizeof(r)); 2114 2115 r.subh.action = PFSYNC_ACT_BUS; 2116 r.subh.count = htons(1); 2117 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2118 2119 r.bus.creatorid = V_pf_status.hostid; 2120 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2121 r.bus.status = status; 2122 2123 PFSYNC_LOCK(sc); 2124 pfsync_send_plus(&r, sizeof(r)); 2125 PFSYNC_UNLOCK(sc); 2126 } 2127 2128 static void 2129 pfsync_bulk_fail(void *arg) 2130 { 2131 struct pfsync_softc *sc = arg; 2132 2133 CURVNET_SET(sc->sc_ifp->if_vnet); 2134 2135 PFSYNC_BLOCK_ASSERT(sc); 2136 2137 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2138 /* Try again */ 2139 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2140 pfsync_bulk_fail, V_pfsyncif); 2141 PFSYNC_LOCK(sc); 2142 pfsync_request_update(0, 0); 2143 PFSYNC_UNLOCK(sc); 2144 } else { 2145 /* Pretend like the transfer was ok. */ 2146 sc->sc_ureq_sent = 0; 2147 sc->sc_bulk_tries = 0; 2148 PFSYNC_LOCK(sc); 2149 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2150 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2151 "pfsync bulk fail"); 2152 sc->sc_flags |= PFSYNCF_OK; 2153 PFSYNC_UNLOCK(sc); 2154 if (V_pf_status.debug >= PF_DEBUG_MISC) 2155 printf("pfsync: failed to receive bulk update\n"); 2156 } 2157 2158 CURVNET_RESTORE(); 2159 } 2160 2161 static void 2162 pfsync_send_plus(void *plus, size_t pluslen) 2163 { 2164 struct pfsync_softc *sc = V_pfsyncif; 2165 2166 PFSYNC_LOCK_ASSERT(sc); 2167 2168 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2169 pfsync_sendout(1); 2170 2171 sc->sc_plus = plus; 2172 sc->sc_len += (sc->sc_pluslen = pluslen); 2173 2174 pfsync_sendout(1); 2175 } 2176 2177 static void 2178 pfsync_timeout(void *arg) 2179 { 2180 struct pfsync_softc *sc = arg; 2181 2182 CURVNET_SET(sc->sc_ifp->if_vnet); 2183 PFSYNC_LOCK(sc); 2184 pfsync_push(sc); 2185 PFSYNC_UNLOCK(sc); 2186 CURVNET_RESTORE(); 2187 } 2188 2189 static void 2190 pfsync_push(struct pfsync_softc *sc) 2191 { 2192 2193 PFSYNC_LOCK_ASSERT(sc); 2194 2195 sc->sc_flags |= PFSYNCF_PUSH; 2196 swi_sched(V_pfsync_swi_cookie, 0); 2197 } 2198 2199 static void 2200 pfsyncintr(void *arg) 2201 { 2202 struct pfsync_softc *sc = arg; 2203 struct mbuf *m, *n; 2204 2205 CURVNET_SET(sc->sc_ifp->if_vnet); 2206 2207 PFSYNC_LOCK(sc); 2208 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2209 pfsync_sendout(0); 2210 sc->sc_flags &= ~PFSYNCF_PUSH; 2211 } 2212 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2213 PFSYNC_UNLOCK(sc); 2214 2215 for (; m != NULL; m = n) { 2216 2217 n = m->m_nextpkt; 2218 m->m_nextpkt = NULL; 2219 2220 /* 2221 * We distinguish between a deferral packet and our 2222 * own pfsync packet based on M_SKIP_FIREWALL 2223 * flag. This is XXX. 2224 */ 2225 if (m->m_flags & M_SKIP_FIREWALL) 2226 ip_output(m, NULL, NULL, 0, NULL, NULL); 2227 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2228 NULL) == 0) 2229 V_pfsyncstats.pfsyncs_opackets++; 2230 else 2231 V_pfsyncstats.pfsyncs_oerrors++; 2232 } 2233 CURVNET_RESTORE(); 2234 } 2235 2236 static int 2237 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2238 { 2239 struct ip_moptions *imo = &sc->sc_imo; 2240 int error; 2241 2242 if (!(ifp->if_flags & IFF_MULTICAST)) 2243 return (EADDRNOTAVAIL); 2244 2245 imo->imo_membership = (struct in_multi **)mship; 2246 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2247 imo->imo_multicast_vif = -1; 2248 2249 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2250 &imo->imo_membership[0])) != 0) { 2251 imo->imo_membership = NULL; 2252 return (error); 2253 } 2254 imo->imo_num_memberships++; 2255 imo->imo_multicast_ifp = ifp; 2256 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2257 imo->imo_multicast_loop = 0; 2258 2259 return (0); 2260 } 2261 2262 static void 2263 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2264 { 2265 struct ip_moptions *imo = &sc->sc_imo; 2266 2267 in_leavegroup(imo->imo_membership[0], NULL); 2268 free(imo->imo_membership, M_PFSYNC); 2269 imo->imo_membership = NULL; 2270 imo->imo_multicast_ifp = NULL; 2271 } 2272 2273 #ifdef INET 2274 extern struct domain inetdomain; 2275 static struct protosw in_pfsync_protosw = { 2276 .pr_type = SOCK_RAW, 2277 .pr_domain = &inetdomain, 2278 .pr_protocol = IPPROTO_PFSYNC, 2279 .pr_flags = PR_ATOMIC|PR_ADDR, 2280 .pr_input = pfsync_input, 2281 .pr_output = (pr_output_t *)rip_output, 2282 .pr_ctloutput = rip_ctloutput, 2283 .pr_usrreqs = &rip_usrreqs 2284 }; 2285 #endif 2286 2287 static void 2288 pfsync_pointers_init() 2289 { 2290 2291 PF_RULES_WLOCK(); 2292 pfsync_state_import_ptr = pfsync_state_import; 2293 pfsync_insert_state_ptr = pfsync_insert_state; 2294 pfsync_update_state_ptr = pfsync_update_state; 2295 pfsync_delete_state_ptr = pfsync_delete_state; 2296 pfsync_clear_states_ptr = pfsync_clear_states; 2297 pfsync_defer_ptr = pfsync_defer; 2298 PF_RULES_WUNLOCK(); 2299 } 2300 2301 static void 2302 pfsync_pointers_uninit() 2303 { 2304 2305 PF_RULES_WLOCK(); 2306 pfsync_state_import_ptr = NULL; 2307 pfsync_insert_state_ptr = NULL; 2308 pfsync_update_state_ptr = NULL; 2309 pfsync_delete_state_ptr = NULL; 2310 pfsync_clear_states_ptr = NULL; 2311 pfsync_defer_ptr = NULL; 2312 PF_RULES_WUNLOCK(); 2313 } 2314 2315 static int 2316 pfsync_init() 2317 { 2318 VNET_ITERATOR_DECL(vnet_iter); 2319 int error = 0; 2320 2321 VNET_LIST_RLOCK(); 2322 VNET_FOREACH(vnet_iter) { 2323 CURVNET_SET(vnet_iter); 2324 V_pfsync_cloner = if_clone_simple(pfsyncname, 2325 pfsync_clone_create, pfsync_clone_destroy, 1); 2326 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2327 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2328 CURVNET_RESTORE(); 2329 if (error) 2330 goto fail_locked; 2331 } 2332 VNET_LIST_RUNLOCK(); 2333 #ifdef INET 2334 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2335 if (error) 2336 goto fail; 2337 error = ipproto_register(IPPROTO_PFSYNC); 2338 if (error) { 2339 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2340 goto fail; 2341 } 2342 #endif 2343 pfsync_pointers_init(); 2344 2345 return (0); 2346 2347 fail: 2348 VNET_LIST_RLOCK(); 2349 fail_locked: 2350 VNET_FOREACH(vnet_iter) { 2351 CURVNET_SET(vnet_iter); 2352 if (V_pfsync_swi_cookie) { 2353 swi_remove(V_pfsync_swi_cookie); 2354 if_clone_detach(V_pfsync_cloner); 2355 } 2356 CURVNET_RESTORE(); 2357 } 2358 VNET_LIST_RUNLOCK(); 2359 2360 return (error); 2361 } 2362 2363 static void 2364 pfsync_uninit() 2365 { 2366 VNET_ITERATOR_DECL(vnet_iter); 2367 2368 pfsync_pointers_uninit(); 2369 2370 ipproto_unregister(IPPROTO_PFSYNC); 2371 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2372 VNET_LIST_RLOCK(); 2373 VNET_FOREACH(vnet_iter) { 2374 CURVNET_SET(vnet_iter); 2375 if_clone_detach(V_pfsync_cloner); 2376 swi_remove(V_pfsync_swi_cookie); 2377 CURVNET_RESTORE(); 2378 } 2379 VNET_LIST_RUNLOCK(); 2380 } 2381 2382 static int 2383 pfsync_modevent(module_t mod, int type, void *data) 2384 { 2385 int error = 0; 2386 2387 switch (type) { 2388 case MOD_LOAD: 2389 error = pfsync_init(); 2390 break; 2391 case MOD_QUIESCE: 2392 /* 2393 * Module should not be unloaded due to race conditions. 2394 */ 2395 error = EBUSY; 2396 break; 2397 case MOD_UNLOAD: 2398 pfsync_uninit(); 2399 break; 2400 default: 2401 error = EINVAL; 2402 break; 2403 } 2404 2405 return (error); 2406 } 2407 2408 static moduledata_t pfsync_mod = { 2409 pfsyncname, 2410 pfsync_modevent, 2411 0 2412 }; 2413 2414 #define PFSYNC_MODVER 1 2415 2416 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2417 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2418 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2419