1 /* $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 /* 46 * Revisions picked from OpenBSD after revision 1.110 import: 47 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 48 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 49 * 1.120, 1.175 - use monotonic time_uptime 50 * 1.122 - reduce number of updates for non-TCP sessions 51 * 1.125, 1.127 - rewrite merge or stale processing 52 * 1.128 - cleanups 53 * 1.146 - bzero() mbuf before sparsely filling it with data 54 * 1.170 - SIOCSIFMTU checks 55 * 1.126, 1.142 - deferred packets processing 56 * 1.173 - correct expire time processing 57 */ 58 59 #include <sys/cdefs.h> 60 __FBSDID("$FreeBSD$"); 61 62 #include "opt_inet.h" 63 #include "opt_inet6.h" 64 #include "opt_pf.h" 65 66 #include <sys/param.h> 67 #include <sys/bus.h> 68 #include <sys/endian.h> 69 #include <sys/interrupt.h> 70 #include <sys/kernel.h> 71 #include <sys/lock.h> 72 #include <sys/mbuf.h> 73 #include <sys/module.h> 74 #include <sys/mutex.h> 75 #include <sys/priv.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/sockio.h> 79 #include <sys/sysctl.h> 80 81 #include <net/bpf.h> 82 #include <net/if.h> 83 #include <net/if_clone.h> 84 #include <net/if_types.h> 85 #include <net/pfvar.h> 86 #include <net/if_pfsync.h> 87 88 #include <netinet/if_ether.h> 89 #include <netinet/in.h> 90 #include <netinet/in_var.h> 91 #include <netinet/ip.h> 92 #include <netinet/ip_carp.h> 93 #include <netinet/ip_var.h> 94 #include <netinet/tcp.h> 95 #include <netinet/tcp_fsm.h> 96 #include <netinet/tcp_seq.h> 97 98 #define PFSYNC_MINPKT ( \ 99 sizeof(struct ip) + \ 100 sizeof(struct pfsync_header) + \ 101 sizeof(struct pfsync_subheader) + \ 102 sizeof(struct pfsync_eof)) 103 104 struct pfsync_pkt { 105 struct ip *ip; 106 struct in_addr src; 107 u_int8_t flags; 108 }; 109 110 static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 111 struct pfsync_state_peer *); 112 static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 113 static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 114 static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 115 static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 116 static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 117 static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 118 static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 119 static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 120 static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 121 static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 122 static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 123 static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 124 125 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 126 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 127 pfsync_in_ins, /* PFSYNC_ACT_INS */ 128 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 129 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 130 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 131 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 132 pfsync_in_del, /* PFSYNC_ACT_DEL */ 133 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 134 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 135 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 136 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 137 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 138 pfsync_in_eof /* PFSYNC_ACT_EOF */ 139 }; 140 141 struct pfsync_q { 142 void (*write)(struct pf_state *, void *); 143 size_t len; 144 u_int8_t action; 145 }; 146 147 /* we have one of these for every PFSYNC_S_ */ 148 static void pfsync_out_state(struct pf_state *, void *); 149 static void pfsync_out_iack(struct pf_state *, void *); 150 static void pfsync_out_upd_c(struct pf_state *, void *); 151 static void pfsync_out_del(struct pf_state *, void *); 152 153 static struct pfsync_q pfsync_qs[] = { 154 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 155 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 156 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 157 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 158 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 159 }; 160 161 static void pfsync_q_ins(struct pf_state *, int); 162 static void pfsync_q_del(struct pf_state *); 163 164 static void pfsync_update_state(struct pf_state *); 165 166 struct pfsync_upd_req_item { 167 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 168 struct pfsync_upd_req ur_msg; 169 }; 170 171 struct pfsync_deferral { 172 struct pfsync_softc *pd_sc; 173 TAILQ_ENTRY(pfsync_deferral) pd_entry; 174 u_int pd_refs; 175 struct callout pd_tmo; 176 177 struct pf_state *pd_st; 178 struct mbuf *pd_m; 179 }; 180 181 struct pfsync_softc { 182 /* Configuration */ 183 struct ifnet *sc_ifp; 184 struct ifnet *sc_sync_if; 185 struct ip_moptions sc_imo; 186 struct in_addr sc_sync_peer; 187 uint32_t sc_flags; 188 #define PFSYNCF_OK 0x00000001 189 #define PFSYNCF_DEFER 0x00000002 190 #define PFSYNCF_PUSH 0x00000004 191 uint8_t sc_maxupdates; 192 struct ip sc_template; 193 struct callout sc_tmo; 194 struct mtx sc_mtx; 195 196 /* Queued data */ 197 size_t sc_len; 198 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 199 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 200 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 201 u_int sc_deferred; 202 void *sc_plus; 203 size_t sc_pluslen; 204 205 /* Bulk update info */ 206 struct mtx sc_bulk_mtx; 207 uint32_t sc_ureq_sent; 208 int sc_bulk_tries; 209 uint32_t sc_ureq_received; 210 int sc_bulk_hashid; 211 uint64_t sc_bulk_stateid; 212 uint32_t sc_bulk_creatorid; 213 struct callout sc_bulk_tmo; 214 struct callout sc_bulkfail_tmo; 215 }; 216 217 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 218 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 219 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 220 221 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 222 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 223 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 224 225 static const char pfsyncname[] = "pfsync"; 226 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 227 static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; 228 #define V_pfsyncif VNET(pfsyncif) 229 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; 230 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 231 static VNET_DEFINE(struct pfsyncstats, pfsyncstats); 232 #define V_pfsyncstats VNET(pfsyncstats) 233 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; 234 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 235 236 static void pfsync_timeout(void *); 237 static void pfsync_push(struct pfsync_softc *); 238 static void pfsyncintr(void *); 239 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 240 void *); 241 static void pfsync_multicast_cleanup(struct pfsync_softc *); 242 static void pfsync_pointers_init(void); 243 static void pfsync_pointers_uninit(void); 244 static int pfsync_init(void); 245 static void pfsync_uninit(void); 246 247 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 248 SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, 249 &VNET_NAME(pfsyncstats), pfsyncstats, 250 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 251 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 252 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 253 254 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 255 static void pfsync_clone_destroy(struct ifnet *); 256 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 257 struct pf_state_peer *); 258 static int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 259 struct route *); 260 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 261 262 static int pfsync_defer(struct pf_state *, struct mbuf *); 263 static void pfsync_undefer(struct pfsync_deferral *, int); 264 static void pfsync_undefer_state(struct pf_state *, int); 265 static void pfsync_defer_tmo(void *); 266 267 static void pfsync_request_update(u_int32_t, u_int64_t); 268 static void pfsync_update_state_req(struct pf_state *); 269 270 static void pfsync_drop(struct pfsync_softc *); 271 static void pfsync_sendout(int); 272 static void pfsync_send_plus(void *, size_t); 273 274 static void pfsync_bulk_start(void); 275 static void pfsync_bulk_status(u_int8_t); 276 static void pfsync_bulk_update(void *); 277 static void pfsync_bulk_fail(void *); 278 279 #ifdef IPSEC 280 static void pfsync_update_net_tdb(struct pfsync_tdb *); 281 #endif 282 283 #define PFSYNC_MAX_BULKTRIES 12 284 285 VNET_DEFINE(struct if_clone *, pfsync_cloner); 286 #define V_pfsync_cloner VNET(pfsync_cloner) 287 288 static int 289 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 290 { 291 struct pfsync_softc *sc; 292 struct ifnet *ifp; 293 int q; 294 295 if (unit != 0) 296 return (EINVAL); 297 298 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 299 sc->sc_flags |= PFSYNCF_OK; 300 301 for (q = 0; q < PFSYNC_S_COUNT; q++) 302 TAILQ_INIT(&sc->sc_qs[q]); 303 304 TAILQ_INIT(&sc->sc_upd_req_list); 305 TAILQ_INIT(&sc->sc_deferrals); 306 307 sc->sc_len = PFSYNC_MINPKT; 308 sc->sc_maxupdates = 128; 309 310 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 311 if (ifp == NULL) { 312 free(sc, M_PFSYNC); 313 return (ENOSPC); 314 } 315 if_initname(ifp, pfsyncname, unit); 316 ifp->if_softc = sc; 317 ifp->if_ioctl = pfsyncioctl; 318 ifp->if_output = pfsyncoutput; 319 ifp->if_type = IFT_PFSYNC; 320 ifp->if_snd.ifq_maxlen = ifqmaxlen; 321 ifp->if_hdrlen = sizeof(struct pfsync_header); 322 ifp->if_mtu = ETHERMTU; 323 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 324 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 325 callout_init(&sc->sc_tmo, CALLOUT_MPSAFE); 326 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 327 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 328 329 if_attach(ifp); 330 331 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 332 333 V_pfsyncif = sc; 334 335 return (0); 336 } 337 338 static void 339 pfsync_clone_destroy(struct ifnet *ifp) 340 { 341 struct pfsync_softc *sc = ifp->if_softc; 342 343 /* 344 * At this stage, everything should have already been 345 * cleared by pfsync_uninit(), and we have only to 346 * drain callouts. 347 */ 348 while (sc->sc_deferred > 0) { 349 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 350 351 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 352 sc->sc_deferred--; 353 if (callout_stop(&pd->pd_tmo)) { 354 pf_release_state(pd->pd_st); 355 m_freem(pd->pd_m); 356 free(pd, M_PFSYNC); 357 } else { 358 pd->pd_refs++; 359 callout_drain(&pd->pd_tmo); 360 free(pd, M_PFSYNC); 361 } 362 } 363 364 callout_drain(&sc->sc_tmo); 365 callout_drain(&sc->sc_bulkfail_tmo); 366 callout_drain(&sc->sc_bulk_tmo); 367 368 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 369 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 370 bpfdetach(ifp); 371 if_detach(ifp); 372 373 pfsync_drop(sc); 374 375 if_free(ifp); 376 if (sc->sc_imo.imo_membership) 377 pfsync_multicast_cleanup(sc); 378 mtx_destroy(&sc->sc_mtx); 379 mtx_destroy(&sc->sc_bulk_mtx); 380 free(sc, M_PFSYNC); 381 382 V_pfsyncif = NULL; 383 } 384 385 static int 386 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 387 struct pf_state_peer *d) 388 { 389 if (s->scrub.scrub_flag && d->scrub == NULL) { 390 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 391 if (d->scrub == NULL) 392 return (ENOMEM); 393 } 394 395 return (0); 396 } 397 398 399 static int 400 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 401 { 402 struct pfsync_softc *sc = V_pfsyncif; 403 struct pf_state *st = NULL; 404 struct pf_state_key *skw = NULL, *sks = NULL; 405 struct pf_rule *r = NULL; 406 struct pfi_kif *kif; 407 int error; 408 409 PF_RULES_RASSERT(); 410 411 if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) { 412 printf("%s: invalid creator id: %08x\n", __func__, 413 ntohl(sp->creatorid)); 414 return (EINVAL); 415 } 416 417 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 418 if (V_pf_status.debug >= PF_DEBUG_MISC) 419 printf("%s: unknown interface: %s\n", __func__, 420 sp->ifname); 421 if (flags & PFSYNC_SI_IOCTL) 422 return (EINVAL); 423 return (0); /* skip this state */ 424 } 425 426 /* 427 * If the ruleset checksums match or the state is coming from the ioctl, 428 * it's safe to associate the state with the rule of that number. 429 */ 430 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 431 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 432 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 433 r = pf_main_ruleset.rules[ 434 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 435 else 436 r = &V_pf_default_rule; 437 438 if ((r->max_states && r->states_cur >= r->max_states)) 439 goto cleanup; 440 441 /* 442 * XXXGL: consider M_WAITOK in ioctl path after. 443 */ 444 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 445 goto cleanup; 446 447 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 448 goto cleanup; 449 450 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 451 &sp->key[PF_SK_STACK].addr[0], sp->af) || 452 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 453 &sp->key[PF_SK_STACK].addr[1], sp->af) || 454 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 455 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 456 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 457 if (sks == NULL) 458 goto cleanup; 459 } else 460 sks = skw; 461 462 /* allocate memory for scrub info */ 463 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 464 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 465 goto cleanup; 466 467 /* copy to state key(s) */ 468 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 469 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 470 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 471 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 472 skw->proto = sp->proto; 473 skw->af = sp->af; 474 if (sks != skw) { 475 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 476 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 477 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 478 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 479 sks->proto = sp->proto; 480 sks->af = sp->af; 481 } 482 483 /* copy to state */ 484 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 485 st->creation = time_uptime - ntohl(sp->creation); 486 st->expire = time_uptime; 487 if (sp->expire) { 488 uint32_t timeout; 489 490 timeout = r->timeout[sp->timeout]; 491 if (!timeout) 492 timeout = V_pf_default_rule.timeout[sp->timeout]; 493 494 /* sp->expire may have been adaptively scaled by export. */ 495 st->expire -= timeout - ntohl(sp->expire); 496 } 497 498 st->direction = sp->direction; 499 st->log = sp->log; 500 st->timeout = sp->timeout; 501 st->state_flags = sp->state_flags; 502 503 st->id = sp->id; 504 st->creatorid = sp->creatorid; 505 pf_state_peer_ntoh(&sp->src, &st->src); 506 pf_state_peer_ntoh(&sp->dst, &st->dst); 507 508 st->rule.ptr = r; 509 st->nat_rule.ptr = NULL; 510 st->anchor.ptr = NULL; 511 st->rt_kif = NULL; 512 513 st->pfsync_time = time_uptime; 514 st->sync_state = PFSYNC_S_NONE; 515 516 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 517 r->states_cur++; 518 r->states_tot++; 519 520 if (!(flags & PFSYNC_SI_IOCTL)) 521 st->state_flags |= PFSTATE_NOSYNC; 522 523 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 524 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 525 r->states_cur--; 526 goto cleanup_state; 527 } 528 529 if (!(flags & PFSYNC_SI_IOCTL)) { 530 st->state_flags &= ~PFSTATE_NOSYNC; 531 if (st->state_flags & PFSTATE_ACK) { 532 pfsync_q_ins(st, PFSYNC_S_IACK); 533 pfsync_push(sc); 534 } 535 } 536 st->state_flags &= ~PFSTATE_ACK; 537 PF_STATE_UNLOCK(st); 538 539 return (0); 540 541 cleanup: 542 error = ENOMEM; 543 if (skw == sks) 544 sks = NULL; 545 if (skw != NULL) 546 uma_zfree(V_pf_state_key_z, skw); 547 if (sks != NULL) 548 uma_zfree(V_pf_state_key_z, sks); 549 550 cleanup_state: /* pf_state_insert() frees the state keys. */ 551 if (st) { 552 if (st->dst.scrub) 553 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 554 if (st->src.scrub) 555 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 556 uma_zfree(V_pf_state_z, st); 557 } 558 return (error); 559 } 560 561 static void 562 pfsync_input(struct mbuf *m, __unused int off) 563 { 564 struct pfsync_softc *sc = V_pfsyncif; 565 struct pfsync_pkt pkt; 566 struct ip *ip = mtod(m, struct ip *); 567 struct pfsync_header *ph; 568 struct pfsync_subheader subh; 569 570 int offset, len; 571 int rv; 572 uint16_t count; 573 574 V_pfsyncstats.pfsyncs_ipackets++; 575 576 /* Verify that we have a sync interface configured. */ 577 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 578 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 579 goto done; 580 581 /* verify that the packet came in on the right interface */ 582 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 583 V_pfsyncstats.pfsyncs_badif++; 584 goto done; 585 } 586 587 sc->sc_ifp->if_ipackets++; 588 sc->sc_ifp->if_ibytes += m->m_pkthdr.len; 589 /* verify that the IP TTL is 255. */ 590 if (ip->ip_ttl != PFSYNC_DFLTTL) { 591 V_pfsyncstats.pfsyncs_badttl++; 592 goto done; 593 } 594 595 offset = ip->ip_hl << 2; 596 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 597 V_pfsyncstats.pfsyncs_hdrops++; 598 goto done; 599 } 600 601 if (offset + sizeof(*ph) > m->m_len) { 602 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 603 V_pfsyncstats.pfsyncs_hdrops++; 604 return; 605 } 606 ip = mtod(m, struct ip *); 607 } 608 ph = (struct pfsync_header *)((char *)ip + offset); 609 610 /* verify the version */ 611 if (ph->version != PFSYNC_VERSION) { 612 V_pfsyncstats.pfsyncs_badver++; 613 goto done; 614 } 615 616 len = ntohs(ph->len) + offset; 617 if (m->m_pkthdr.len < len) { 618 V_pfsyncstats.pfsyncs_badlen++; 619 goto done; 620 } 621 622 /* Cheaper to grab this now than having to mess with mbufs later */ 623 pkt.ip = ip; 624 pkt.src = ip->ip_src; 625 pkt.flags = 0; 626 627 /* 628 * Trusting pf_chksum during packet processing, as well as seeking 629 * in interface name tree, require holding PF_RULES_RLOCK(). 630 */ 631 PF_RULES_RLOCK(); 632 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 633 pkt.flags |= PFSYNC_SI_CKSUM; 634 635 offset += sizeof(*ph); 636 while (offset <= len - sizeof(subh)) { 637 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 638 offset += sizeof(subh); 639 640 if (subh.action >= PFSYNC_ACT_MAX) { 641 V_pfsyncstats.pfsyncs_badact++; 642 PF_RULES_RUNLOCK(); 643 goto done; 644 } 645 646 count = ntohs(subh.count); 647 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 648 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 649 if (rv == -1) { 650 PF_RULES_RUNLOCK(); 651 return; 652 } 653 654 offset += rv; 655 } 656 PF_RULES_RUNLOCK(); 657 658 done: 659 m_freem(m); 660 } 661 662 static int 663 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 664 { 665 struct pfsync_clr *clr; 666 struct mbuf *mp; 667 int len = sizeof(*clr) * count; 668 int i, offp; 669 u_int32_t creatorid; 670 671 mp = m_pulldown(m, offset, len, &offp); 672 if (mp == NULL) { 673 V_pfsyncstats.pfsyncs_badlen++; 674 return (-1); 675 } 676 clr = (struct pfsync_clr *)(mp->m_data + offp); 677 678 for (i = 0; i < count; i++) { 679 creatorid = clr[i].creatorid; 680 681 if (clr[i].ifname[0] != '\0' && 682 pfi_kif_find(clr[i].ifname) == NULL) 683 continue; 684 685 for (int i = 0; i <= V_pf_hashmask; i++) { 686 struct pf_idhash *ih = &V_pf_idhash[i]; 687 struct pf_state *s; 688 relock: 689 PF_HASHROW_LOCK(ih); 690 LIST_FOREACH(s, &ih->states, entry) { 691 if (s->creatorid == creatorid) { 692 s->state_flags |= PFSTATE_NOSYNC; 693 pf_unlink_state(s, PF_ENTER_LOCKED); 694 goto relock; 695 } 696 } 697 PF_HASHROW_UNLOCK(ih); 698 } 699 } 700 701 return (len); 702 } 703 704 static int 705 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 706 { 707 struct mbuf *mp; 708 struct pfsync_state *sa, *sp; 709 int len = sizeof(*sp) * count; 710 int i, offp; 711 712 mp = m_pulldown(m, offset, len, &offp); 713 if (mp == NULL) { 714 V_pfsyncstats.pfsyncs_badlen++; 715 return (-1); 716 } 717 sa = (struct pfsync_state *)(mp->m_data + offp); 718 719 for (i = 0; i < count; i++) { 720 sp = &sa[i]; 721 722 /* Check for invalid values. */ 723 if (sp->timeout >= PFTM_MAX || 724 sp->src.state > PF_TCPS_PROXY_DST || 725 sp->dst.state > PF_TCPS_PROXY_DST || 726 sp->direction > PF_OUT || 727 (sp->af != AF_INET && sp->af != AF_INET6)) { 728 if (V_pf_status.debug >= PF_DEBUG_MISC) 729 printf("%s: invalid value\n", __func__); 730 V_pfsyncstats.pfsyncs_badval++; 731 continue; 732 } 733 734 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 735 /* Drop out, but process the rest of the actions. */ 736 break; 737 } 738 739 return (len); 740 } 741 742 static int 743 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 744 { 745 struct pfsync_ins_ack *ia, *iaa; 746 struct pf_state *st; 747 748 struct mbuf *mp; 749 int len = count * sizeof(*ia); 750 int offp, i; 751 752 mp = m_pulldown(m, offset, len, &offp); 753 if (mp == NULL) { 754 V_pfsyncstats.pfsyncs_badlen++; 755 return (-1); 756 } 757 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 758 759 for (i = 0; i < count; i++) { 760 ia = &iaa[i]; 761 762 st = pf_find_state_byid(ia->id, ia->creatorid); 763 if (st == NULL) 764 continue; 765 766 if (st->state_flags & PFSTATE_ACK) { 767 PFSYNC_LOCK(V_pfsyncif); 768 pfsync_undefer_state(st, 0); 769 PFSYNC_UNLOCK(V_pfsyncif); 770 } 771 PF_STATE_UNLOCK(st); 772 } 773 /* 774 * XXX this is not yet implemented, but we know the size of the 775 * message so we can skip it. 776 */ 777 778 return (count * sizeof(struct pfsync_ins_ack)); 779 } 780 781 static int 782 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 783 struct pfsync_state_peer *dst) 784 { 785 int sync = 0; 786 787 PF_STATE_LOCK_ASSERT(st); 788 789 /* 790 * The state should never go backwards except 791 * for syn-proxy states. Neither should the 792 * sequence window slide backwards. 793 */ 794 if ((st->src.state > src->state && 795 (st->src.state < PF_TCPS_PROXY_SRC || 796 src->state >= PF_TCPS_PROXY_SRC)) || 797 798 (st->src.state == src->state && 799 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 800 sync++; 801 else 802 pf_state_peer_ntoh(src, &st->src); 803 804 if ((st->dst.state > dst->state) || 805 806 (st->dst.state >= TCPS_SYN_SENT && 807 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 808 sync++; 809 else 810 pf_state_peer_ntoh(dst, &st->dst); 811 812 return (sync); 813 } 814 815 static int 816 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 817 { 818 struct pfsync_softc *sc = V_pfsyncif; 819 struct pfsync_state *sa, *sp; 820 struct pf_state *st; 821 int sync; 822 823 struct mbuf *mp; 824 int len = count * sizeof(*sp); 825 int offp, i; 826 827 mp = m_pulldown(m, offset, len, &offp); 828 if (mp == NULL) { 829 V_pfsyncstats.pfsyncs_badlen++; 830 return (-1); 831 } 832 sa = (struct pfsync_state *)(mp->m_data + offp); 833 834 for (i = 0; i < count; i++) { 835 sp = &sa[i]; 836 837 /* check for invalid values */ 838 if (sp->timeout >= PFTM_MAX || 839 sp->src.state > PF_TCPS_PROXY_DST || 840 sp->dst.state > PF_TCPS_PROXY_DST) { 841 if (V_pf_status.debug >= PF_DEBUG_MISC) { 842 printf("pfsync_input: PFSYNC_ACT_UPD: " 843 "invalid value\n"); 844 } 845 V_pfsyncstats.pfsyncs_badval++; 846 continue; 847 } 848 849 st = pf_find_state_byid(sp->id, sp->creatorid); 850 if (st == NULL) { 851 /* insert the update */ 852 if (pfsync_state_import(sp, 0)) 853 V_pfsyncstats.pfsyncs_badstate++; 854 continue; 855 } 856 857 if (st->state_flags & PFSTATE_ACK) { 858 PFSYNC_LOCK(sc); 859 pfsync_undefer_state(st, 1); 860 PFSYNC_UNLOCK(sc); 861 } 862 863 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 864 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 865 else { 866 sync = 0; 867 868 /* 869 * Non-TCP protocol state machine always go 870 * forwards 871 */ 872 if (st->src.state > sp->src.state) 873 sync++; 874 else 875 pf_state_peer_ntoh(&sp->src, &st->src); 876 if (st->dst.state > sp->dst.state) 877 sync++; 878 else 879 pf_state_peer_ntoh(&sp->dst, &st->dst); 880 } 881 if (sync < 2) { 882 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 883 pf_state_peer_ntoh(&sp->dst, &st->dst); 884 st->expire = time_uptime; 885 st->timeout = sp->timeout; 886 } 887 st->pfsync_time = time_uptime; 888 889 if (sync) { 890 V_pfsyncstats.pfsyncs_stale++; 891 892 pfsync_update_state(st); 893 PF_STATE_UNLOCK(st); 894 PFSYNC_LOCK(sc); 895 pfsync_push(sc); 896 PFSYNC_UNLOCK(sc); 897 continue; 898 } 899 PF_STATE_UNLOCK(st); 900 } 901 902 return (len); 903 } 904 905 static int 906 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 907 { 908 struct pfsync_softc *sc = V_pfsyncif; 909 struct pfsync_upd_c *ua, *up; 910 struct pf_state *st; 911 int len = count * sizeof(*up); 912 int sync; 913 struct mbuf *mp; 914 int offp, i; 915 916 mp = m_pulldown(m, offset, len, &offp); 917 if (mp == NULL) { 918 V_pfsyncstats.pfsyncs_badlen++; 919 return (-1); 920 } 921 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 922 923 for (i = 0; i < count; i++) { 924 up = &ua[i]; 925 926 /* check for invalid values */ 927 if (up->timeout >= PFTM_MAX || 928 up->src.state > PF_TCPS_PROXY_DST || 929 up->dst.state > PF_TCPS_PROXY_DST) { 930 if (V_pf_status.debug >= PF_DEBUG_MISC) { 931 printf("pfsync_input: " 932 "PFSYNC_ACT_UPD_C: " 933 "invalid value\n"); 934 } 935 V_pfsyncstats.pfsyncs_badval++; 936 continue; 937 } 938 939 st = pf_find_state_byid(up->id, up->creatorid); 940 if (st == NULL) { 941 /* We don't have this state. Ask for it. */ 942 PFSYNC_LOCK(sc); 943 pfsync_request_update(up->creatorid, up->id); 944 PFSYNC_UNLOCK(sc); 945 continue; 946 } 947 948 if (st->state_flags & PFSTATE_ACK) { 949 PFSYNC_LOCK(sc); 950 pfsync_undefer_state(st, 1); 951 PFSYNC_UNLOCK(sc); 952 } 953 954 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 955 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 956 else { 957 sync = 0; 958 959 /* 960 * Non-TCP protocol state machine always go 961 * forwards 962 */ 963 if (st->src.state > up->src.state) 964 sync++; 965 else 966 pf_state_peer_ntoh(&up->src, &st->src); 967 if (st->dst.state > up->dst.state) 968 sync++; 969 else 970 pf_state_peer_ntoh(&up->dst, &st->dst); 971 } 972 if (sync < 2) { 973 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 974 pf_state_peer_ntoh(&up->dst, &st->dst); 975 st->expire = time_uptime; 976 st->timeout = up->timeout; 977 } 978 st->pfsync_time = time_uptime; 979 980 if (sync) { 981 V_pfsyncstats.pfsyncs_stale++; 982 983 pfsync_update_state(st); 984 PF_STATE_UNLOCK(st); 985 PFSYNC_LOCK(sc); 986 pfsync_push(sc); 987 PFSYNC_UNLOCK(sc); 988 continue; 989 } 990 PF_STATE_UNLOCK(st); 991 } 992 993 return (len); 994 } 995 996 static int 997 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 998 { 999 struct pfsync_upd_req *ur, *ura; 1000 struct mbuf *mp; 1001 int len = count * sizeof(*ur); 1002 int i, offp; 1003 1004 struct pf_state *st; 1005 1006 mp = m_pulldown(m, offset, len, &offp); 1007 if (mp == NULL) { 1008 V_pfsyncstats.pfsyncs_badlen++; 1009 return (-1); 1010 } 1011 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1012 1013 for (i = 0; i < count; i++) { 1014 ur = &ura[i]; 1015 1016 if (ur->id == 0 && ur->creatorid == 0) 1017 pfsync_bulk_start(); 1018 else { 1019 st = pf_find_state_byid(ur->id, ur->creatorid); 1020 if (st == NULL) { 1021 V_pfsyncstats.pfsyncs_badstate++; 1022 continue; 1023 } 1024 if (st->state_flags & PFSTATE_NOSYNC) { 1025 PF_STATE_UNLOCK(st); 1026 continue; 1027 } 1028 1029 pfsync_update_state_req(st); 1030 PF_STATE_UNLOCK(st); 1031 } 1032 } 1033 1034 return (len); 1035 } 1036 1037 static int 1038 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1039 { 1040 struct mbuf *mp; 1041 struct pfsync_state *sa, *sp; 1042 struct pf_state *st; 1043 int len = count * sizeof(*sp); 1044 int offp, i; 1045 1046 mp = m_pulldown(m, offset, len, &offp); 1047 if (mp == NULL) { 1048 V_pfsyncstats.pfsyncs_badlen++; 1049 return (-1); 1050 } 1051 sa = (struct pfsync_state *)(mp->m_data + offp); 1052 1053 for (i = 0; i < count; i++) { 1054 sp = &sa[i]; 1055 1056 st = pf_find_state_byid(sp->id, sp->creatorid); 1057 if (st == NULL) { 1058 V_pfsyncstats.pfsyncs_badstate++; 1059 continue; 1060 } 1061 st->state_flags |= PFSTATE_NOSYNC; 1062 pf_unlink_state(st, PF_ENTER_LOCKED); 1063 } 1064 1065 return (len); 1066 } 1067 1068 static int 1069 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1070 { 1071 struct mbuf *mp; 1072 struct pfsync_del_c *sa, *sp; 1073 struct pf_state *st; 1074 int len = count * sizeof(*sp); 1075 int offp, i; 1076 1077 mp = m_pulldown(m, offset, len, &offp); 1078 if (mp == NULL) { 1079 V_pfsyncstats.pfsyncs_badlen++; 1080 return (-1); 1081 } 1082 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1083 1084 for (i = 0; i < count; i++) { 1085 sp = &sa[i]; 1086 1087 st = pf_find_state_byid(sp->id, sp->creatorid); 1088 if (st == NULL) { 1089 V_pfsyncstats.pfsyncs_badstate++; 1090 continue; 1091 } 1092 1093 st->state_flags |= PFSTATE_NOSYNC; 1094 pf_unlink_state(st, PF_ENTER_LOCKED); 1095 } 1096 1097 return (len); 1098 } 1099 1100 static int 1101 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1102 { 1103 struct pfsync_softc *sc = V_pfsyncif; 1104 struct pfsync_bus *bus; 1105 struct mbuf *mp; 1106 int len = count * sizeof(*bus); 1107 int offp; 1108 1109 PFSYNC_BLOCK(sc); 1110 1111 /* If we're not waiting for a bulk update, who cares. */ 1112 if (sc->sc_ureq_sent == 0) { 1113 PFSYNC_BUNLOCK(sc); 1114 return (len); 1115 } 1116 1117 mp = m_pulldown(m, offset, len, &offp); 1118 if (mp == NULL) { 1119 PFSYNC_BUNLOCK(sc); 1120 V_pfsyncstats.pfsyncs_badlen++; 1121 return (-1); 1122 } 1123 bus = (struct pfsync_bus *)(mp->m_data + offp); 1124 1125 switch (bus->status) { 1126 case PFSYNC_BUS_START: 1127 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1128 V_pf_limits[PF_LIMIT_STATES].limit / 1129 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1130 sizeof(struct pfsync_state)), 1131 pfsync_bulk_fail, sc); 1132 if (V_pf_status.debug >= PF_DEBUG_MISC) 1133 printf("pfsync: received bulk update start\n"); 1134 break; 1135 1136 case PFSYNC_BUS_END: 1137 if (time_uptime - ntohl(bus->endtime) >= 1138 sc->sc_ureq_sent) { 1139 /* that's it, we're happy */ 1140 sc->sc_ureq_sent = 0; 1141 sc->sc_bulk_tries = 0; 1142 callout_stop(&sc->sc_bulkfail_tmo); 1143 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1144 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1145 "pfsync bulk done"); 1146 sc->sc_flags |= PFSYNCF_OK; 1147 if (V_pf_status.debug >= PF_DEBUG_MISC) 1148 printf("pfsync: received valid " 1149 "bulk update end\n"); 1150 } else { 1151 if (V_pf_status.debug >= PF_DEBUG_MISC) 1152 printf("pfsync: received invalid " 1153 "bulk update end: bad timestamp\n"); 1154 } 1155 break; 1156 } 1157 PFSYNC_BUNLOCK(sc); 1158 1159 return (len); 1160 } 1161 1162 static int 1163 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1164 { 1165 int len = count * sizeof(struct pfsync_tdb); 1166 1167 #if defined(IPSEC) 1168 struct pfsync_tdb *tp; 1169 struct mbuf *mp; 1170 int offp; 1171 int i; 1172 int s; 1173 1174 mp = m_pulldown(m, offset, len, &offp); 1175 if (mp == NULL) { 1176 V_pfsyncstats.pfsyncs_badlen++; 1177 return (-1); 1178 } 1179 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1180 1181 for (i = 0; i < count; i++) 1182 pfsync_update_net_tdb(&tp[i]); 1183 #endif 1184 1185 return (len); 1186 } 1187 1188 #if defined(IPSEC) 1189 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1190 static void 1191 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1192 { 1193 struct tdb *tdb; 1194 int s; 1195 1196 /* check for invalid values */ 1197 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1198 (pt->dst.sa.sa_family != AF_INET && 1199 pt->dst.sa.sa_family != AF_INET6)) 1200 goto bad; 1201 1202 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1203 if (tdb) { 1204 pt->rpl = ntohl(pt->rpl); 1205 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1206 1207 /* Neither replay nor byte counter should ever decrease. */ 1208 if (pt->rpl < tdb->tdb_rpl || 1209 pt->cur_bytes < tdb->tdb_cur_bytes) { 1210 goto bad; 1211 } 1212 1213 tdb->tdb_rpl = pt->rpl; 1214 tdb->tdb_cur_bytes = pt->cur_bytes; 1215 } 1216 return; 1217 1218 bad: 1219 if (V_pf_status.debug >= PF_DEBUG_MISC) 1220 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1221 "invalid value\n"); 1222 V_pfsyncstats.pfsyncs_badstate++; 1223 return; 1224 } 1225 #endif 1226 1227 1228 static int 1229 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1230 { 1231 /* check if we are at the right place in the packet */ 1232 if (offset != m->m_pkthdr.len) 1233 V_pfsyncstats.pfsyncs_badlen++; 1234 1235 /* we're done. free and let the caller return */ 1236 m_freem(m); 1237 return (-1); 1238 } 1239 1240 static int 1241 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1242 { 1243 V_pfsyncstats.pfsyncs_badact++; 1244 1245 m_freem(m); 1246 return (-1); 1247 } 1248 1249 static int 1250 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1251 struct route *rt) 1252 { 1253 m_freem(m); 1254 return (0); 1255 } 1256 1257 /* ARGSUSED */ 1258 static int 1259 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1260 { 1261 struct pfsync_softc *sc = ifp->if_softc; 1262 struct ifreq *ifr = (struct ifreq *)data; 1263 struct pfsyncreq pfsyncr; 1264 int error; 1265 1266 switch (cmd) { 1267 case SIOCSIFFLAGS: 1268 PFSYNC_LOCK(sc); 1269 if (ifp->if_flags & IFF_UP) { 1270 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1271 PFSYNC_UNLOCK(sc); 1272 pfsync_pointers_init(); 1273 } else { 1274 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1275 PFSYNC_UNLOCK(sc); 1276 pfsync_pointers_uninit(); 1277 } 1278 break; 1279 case SIOCSIFMTU: 1280 if (!sc->sc_sync_if || 1281 ifr->ifr_mtu <= PFSYNC_MINPKT || 1282 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1283 return (EINVAL); 1284 if (ifr->ifr_mtu < ifp->if_mtu) { 1285 PFSYNC_LOCK(sc); 1286 if (sc->sc_len > PFSYNC_MINPKT) 1287 pfsync_sendout(1); 1288 PFSYNC_UNLOCK(sc); 1289 } 1290 ifp->if_mtu = ifr->ifr_mtu; 1291 break; 1292 case SIOCGETPFSYNC: 1293 bzero(&pfsyncr, sizeof(pfsyncr)); 1294 PFSYNC_LOCK(sc); 1295 if (sc->sc_sync_if) { 1296 strlcpy(pfsyncr.pfsyncr_syncdev, 1297 sc->sc_sync_if->if_xname, IFNAMSIZ); 1298 } 1299 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1300 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1301 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1302 (sc->sc_flags & PFSYNCF_DEFER)); 1303 PFSYNC_UNLOCK(sc); 1304 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1305 1306 case SIOCSETPFSYNC: 1307 { 1308 struct ip_moptions *imo = &sc->sc_imo; 1309 struct ifnet *sifp; 1310 struct ip *ip; 1311 void *mship = NULL; 1312 1313 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1314 return (error); 1315 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1316 return (error); 1317 1318 if (pfsyncr.pfsyncr_maxupdates > 255) 1319 return (EINVAL); 1320 1321 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1322 sifp = NULL; 1323 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1324 return (EINVAL); 1325 1326 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0 && sifp != NULL) 1327 mship = malloc((sizeof(struct in_multi *) * 1328 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1329 1330 PFSYNC_LOCK(sc); 1331 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1332 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1333 else 1334 sc->sc_sync_peer.s_addr = 1335 pfsyncr.pfsyncr_syncpeer.s_addr; 1336 1337 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1338 if (pfsyncr.pfsyncr_defer) { 1339 sc->sc_flags |= PFSYNCF_DEFER; 1340 pfsync_defer_ptr = pfsync_defer; 1341 } else { 1342 sc->sc_flags &= ~PFSYNCF_DEFER; 1343 pfsync_defer_ptr = NULL; 1344 } 1345 1346 if (sifp == NULL) { 1347 if (sc->sc_sync_if) 1348 if_rele(sc->sc_sync_if); 1349 sc->sc_sync_if = NULL; 1350 if (imo->imo_membership) 1351 pfsync_multicast_cleanup(sc); 1352 PFSYNC_UNLOCK(sc); 1353 break; 1354 } 1355 1356 if (sc->sc_len > PFSYNC_MINPKT && 1357 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1358 (sc->sc_sync_if != NULL && 1359 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1360 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1361 pfsync_sendout(1); 1362 1363 if (imo->imo_membership) 1364 pfsync_multicast_cleanup(sc); 1365 1366 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1367 error = pfsync_multicast_setup(sc, sifp, mship); 1368 if (error) { 1369 if_rele(sifp); 1370 free(mship, M_PFSYNC); 1371 return (error); 1372 } 1373 } 1374 if (sc->sc_sync_if) 1375 if_rele(sc->sc_sync_if); 1376 sc->sc_sync_if = sifp; 1377 1378 ip = &sc->sc_template; 1379 bzero(ip, sizeof(*ip)); 1380 ip->ip_v = IPVERSION; 1381 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1382 ip->ip_tos = IPTOS_LOWDELAY; 1383 /* len and id are set later. */ 1384 ip->ip_off = htons(IP_DF); 1385 ip->ip_ttl = PFSYNC_DFLTTL; 1386 ip->ip_p = IPPROTO_PFSYNC; 1387 ip->ip_src.s_addr = INADDR_ANY; 1388 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1389 1390 /* Request a full state table update. */ 1391 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1392 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1393 "pfsync bulk start"); 1394 sc->sc_flags &= ~PFSYNCF_OK; 1395 if (V_pf_status.debug >= PF_DEBUG_MISC) 1396 printf("pfsync: requesting bulk update\n"); 1397 pfsync_request_update(0, 0); 1398 PFSYNC_UNLOCK(sc); 1399 PFSYNC_BLOCK(sc); 1400 sc->sc_ureq_sent = time_uptime; 1401 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1402 sc); 1403 PFSYNC_BUNLOCK(sc); 1404 1405 break; 1406 } 1407 default: 1408 return (ENOTTY); 1409 } 1410 1411 return (0); 1412 } 1413 1414 static void 1415 pfsync_out_state(struct pf_state *st, void *buf) 1416 { 1417 struct pfsync_state *sp = buf; 1418 1419 pfsync_state_export(sp, st); 1420 } 1421 1422 static void 1423 pfsync_out_iack(struct pf_state *st, void *buf) 1424 { 1425 struct pfsync_ins_ack *iack = buf; 1426 1427 iack->id = st->id; 1428 iack->creatorid = st->creatorid; 1429 } 1430 1431 static void 1432 pfsync_out_upd_c(struct pf_state *st, void *buf) 1433 { 1434 struct pfsync_upd_c *up = buf; 1435 1436 bzero(up, sizeof(*up)); 1437 up->id = st->id; 1438 pf_state_peer_hton(&st->src, &up->src); 1439 pf_state_peer_hton(&st->dst, &up->dst); 1440 up->creatorid = st->creatorid; 1441 up->timeout = st->timeout; 1442 } 1443 1444 static void 1445 pfsync_out_del(struct pf_state *st, void *buf) 1446 { 1447 struct pfsync_del_c *dp = buf; 1448 1449 dp->id = st->id; 1450 dp->creatorid = st->creatorid; 1451 st->state_flags |= PFSTATE_NOSYNC; 1452 } 1453 1454 static void 1455 pfsync_drop(struct pfsync_softc *sc) 1456 { 1457 struct pf_state *st, *next; 1458 struct pfsync_upd_req_item *ur; 1459 int q; 1460 1461 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1462 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1463 continue; 1464 1465 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1466 KASSERT(st->sync_state == q, 1467 ("%s: st->sync_state == q", 1468 __func__)); 1469 st->sync_state = PFSYNC_S_NONE; 1470 pf_release_state(st); 1471 } 1472 TAILQ_INIT(&sc->sc_qs[q]); 1473 } 1474 1475 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1476 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1477 free(ur, M_PFSYNC); 1478 } 1479 1480 sc->sc_plus = NULL; 1481 sc->sc_len = PFSYNC_MINPKT; 1482 } 1483 1484 static void 1485 pfsync_sendout(int schedswi) 1486 { 1487 struct pfsync_softc *sc = V_pfsyncif; 1488 struct ifnet *ifp = sc->sc_ifp; 1489 struct mbuf *m; 1490 struct ip *ip; 1491 struct pfsync_header *ph; 1492 struct pfsync_subheader *subh; 1493 struct pf_state *st; 1494 struct pfsync_upd_req_item *ur; 1495 int offset; 1496 int q, count = 0; 1497 1498 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1499 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1500 ("%s: sc_len %zu", __func__, sc->sc_len)); 1501 PFSYNC_LOCK_ASSERT(sc); 1502 1503 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1504 pfsync_drop(sc); 1505 return; 1506 } 1507 1508 m = m_get2(M_NOWAIT, MT_DATA, M_PKTHDR, max_linkhdr + sc->sc_len); 1509 if (m == NULL) { 1510 sc->sc_ifp->if_oerrors++; 1511 V_pfsyncstats.pfsyncs_onomem++; 1512 return; 1513 } 1514 m->m_data += max_linkhdr; 1515 m->m_len = m->m_pkthdr.len = sc->sc_len; 1516 1517 /* build the ip header */ 1518 ip = (struct ip *)m->m_data; 1519 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1520 offset = sizeof(*ip); 1521 1522 ip->ip_len = htons(m->m_pkthdr.len); 1523 ip->ip_id = htons(ip_randomid()); 1524 1525 /* build the pfsync header */ 1526 ph = (struct pfsync_header *)(m->m_data + offset); 1527 bzero(ph, sizeof(*ph)); 1528 offset += sizeof(*ph); 1529 1530 ph->version = PFSYNC_VERSION; 1531 ph->len = htons(sc->sc_len - sizeof(*ip)); 1532 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1533 1534 /* walk the queues */ 1535 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1536 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1537 continue; 1538 1539 subh = (struct pfsync_subheader *)(m->m_data + offset); 1540 offset += sizeof(*subh); 1541 1542 count = 0; 1543 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1544 KASSERT(st->sync_state == q, 1545 ("%s: st->sync_state == q", 1546 __func__)); 1547 /* 1548 * XXXGL: some of write methods do unlocked reads 1549 * of state data :( 1550 */ 1551 pfsync_qs[q].write(st, m->m_data + offset); 1552 offset += pfsync_qs[q].len; 1553 st->sync_state = PFSYNC_S_NONE; 1554 pf_release_state(st); 1555 count++; 1556 } 1557 TAILQ_INIT(&sc->sc_qs[q]); 1558 1559 bzero(subh, sizeof(*subh)); 1560 subh->action = pfsync_qs[q].action; 1561 subh->count = htons(count); 1562 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1563 } 1564 1565 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1566 subh = (struct pfsync_subheader *)(m->m_data + offset); 1567 offset += sizeof(*subh); 1568 1569 count = 0; 1570 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1571 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1572 1573 bcopy(&ur->ur_msg, m->m_data + offset, 1574 sizeof(ur->ur_msg)); 1575 offset += sizeof(ur->ur_msg); 1576 free(ur, M_PFSYNC); 1577 count++; 1578 } 1579 1580 bzero(subh, sizeof(*subh)); 1581 subh->action = PFSYNC_ACT_UPD_REQ; 1582 subh->count = htons(count); 1583 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1584 } 1585 1586 /* has someone built a custom region for us to add? */ 1587 if (sc->sc_plus != NULL) { 1588 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1589 offset += sc->sc_pluslen; 1590 1591 sc->sc_plus = NULL; 1592 } 1593 1594 subh = (struct pfsync_subheader *)(m->m_data + offset); 1595 offset += sizeof(*subh); 1596 1597 bzero(subh, sizeof(*subh)); 1598 subh->action = PFSYNC_ACT_EOF; 1599 subh->count = htons(1); 1600 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1601 1602 /* we're done, let's put it on the wire */ 1603 if (ifp->if_bpf) { 1604 m->m_data += sizeof(*ip); 1605 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1606 BPF_MTAP(ifp, m); 1607 m->m_data -= sizeof(*ip); 1608 m->m_len = m->m_pkthdr.len = sc->sc_len; 1609 } 1610 1611 if (sc->sc_sync_if == NULL) { 1612 sc->sc_len = PFSYNC_MINPKT; 1613 m_freem(m); 1614 return; 1615 } 1616 1617 sc->sc_ifp->if_opackets++; 1618 sc->sc_ifp->if_obytes += m->m_pkthdr.len; 1619 sc->sc_len = PFSYNC_MINPKT; 1620 1621 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1622 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1623 else { 1624 m_freem(m); 1625 sc->sc_ifp->if_snd.ifq_drops++; 1626 } 1627 if (schedswi) 1628 swi_sched(V_pfsync_swi_cookie, 0); 1629 } 1630 1631 static void 1632 pfsync_insert_state(struct pf_state *st) 1633 { 1634 struct pfsync_softc *sc = V_pfsyncif; 1635 1636 if (st->state_flags & PFSTATE_NOSYNC) 1637 return; 1638 1639 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1640 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1641 st->state_flags |= PFSTATE_NOSYNC; 1642 return; 1643 } 1644 1645 KASSERT(st->sync_state == PFSYNC_S_NONE, 1646 ("%s: st->sync_state == PFSYNC_S_NONE", __func__)); 1647 1648 PFSYNC_LOCK(sc); 1649 if (sc->sc_len == PFSYNC_MINPKT) 1650 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1651 1652 pfsync_q_ins(st, PFSYNC_S_INS); 1653 PFSYNC_UNLOCK(sc); 1654 1655 st->sync_updates = 0; 1656 } 1657 1658 static int 1659 pfsync_defer(struct pf_state *st, struct mbuf *m) 1660 { 1661 struct pfsync_softc *sc = V_pfsyncif; 1662 struct pfsync_deferral *pd; 1663 1664 if (m->m_flags & (M_BCAST|M_MCAST)) 1665 return (0); 1666 1667 PFSYNC_LOCK(sc); 1668 1669 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1670 !(sc->sc_flags & PFSYNCF_DEFER)) { 1671 PFSYNC_UNLOCK(sc); 1672 return (0); 1673 } 1674 1675 if (sc->sc_deferred >= 128) 1676 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1677 1678 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1679 if (pd == NULL) 1680 return (0); 1681 sc->sc_deferred++; 1682 1683 m->m_flags |= M_SKIP_FIREWALL; 1684 st->state_flags |= PFSTATE_ACK; 1685 1686 pd->pd_sc = sc; 1687 pd->pd_refs = 0; 1688 pd->pd_st = st; 1689 pf_ref_state(st); 1690 pd->pd_m = m; 1691 1692 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1693 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1694 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1695 1696 pfsync_push(sc); 1697 1698 return (1); 1699 } 1700 1701 static void 1702 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1703 { 1704 struct pfsync_softc *sc = pd->pd_sc; 1705 struct mbuf *m = pd->pd_m; 1706 struct pf_state *st = pd->pd_st; 1707 1708 PFSYNC_LOCK_ASSERT(sc); 1709 1710 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1711 sc->sc_deferred--; 1712 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1713 free(pd, M_PFSYNC); 1714 pf_release_state(st); 1715 1716 if (drop) 1717 m_freem(m); 1718 else { 1719 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1720 pfsync_push(sc); 1721 } 1722 } 1723 1724 static void 1725 pfsync_defer_tmo(void *arg) 1726 { 1727 struct pfsync_deferral *pd = arg; 1728 struct pfsync_softc *sc = pd->pd_sc; 1729 struct mbuf *m = pd->pd_m; 1730 struct pf_state *st = pd->pd_st; 1731 1732 PFSYNC_LOCK_ASSERT(sc); 1733 1734 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1735 1736 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1737 sc->sc_deferred--; 1738 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1739 if (pd->pd_refs == 0) 1740 free(pd, M_PFSYNC); 1741 PFSYNC_UNLOCK(sc); 1742 1743 ip_output(m, NULL, NULL, 0, NULL, NULL); 1744 1745 pf_release_state(st); 1746 1747 CURVNET_RESTORE(); 1748 } 1749 1750 static void 1751 pfsync_undefer_state(struct pf_state *st, int drop) 1752 { 1753 struct pfsync_softc *sc = V_pfsyncif; 1754 struct pfsync_deferral *pd; 1755 1756 PFSYNC_LOCK_ASSERT(sc); 1757 1758 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1759 if (pd->pd_st == st) { 1760 if (callout_stop(&pd->pd_tmo)) 1761 pfsync_undefer(pd, drop); 1762 return; 1763 } 1764 } 1765 1766 panic("%s: unable to find deferred state", __func__); 1767 } 1768 1769 static void 1770 pfsync_update_state(struct pf_state *st) 1771 { 1772 struct pfsync_softc *sc = V_pfsyncif; 1773 int sync = 0; 1774 1775 PF_STATE_LOCK_ASSERT(st); 1776 PFSYNC_LOCK(sc); 1777 1778 if (st->state_flags & PFSTATE_ACK) 1779 pfsync_undefer_state(st, 0); 1780 if (st->state_flags & PFSTATE_NOSYNC) { 1781 if (st->sync_state != PFSYNC_S_NONE) 1782 pfsync_q_del(st); 1783 PFSYNC_UNLOCK(sc); 1784 return; 1785 } 1786 1787 if (sc->sc_len == PFSYNC_MINPKT) 1788 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1789 1790 switch (st->sync_state) { 1791 case PFSYNC_S_UPD_C: 1792 case PFSYNC_S_UPD: 1793 case PFSYNC_S_INS: 1794 /* we're already handling it */ 1795 1796 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1797 st->sync_updates++; 1798 if (st->sync_updates >= sc->sc_maxupdates) 1799 sync = 1; 1800 } 1801 break; 1802 1803 case PFSYNC_S_IACK: 1804 pfsync_q_del(st); 1805 case PFSYNC_S_NONE: 1806 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1807 st->sync_updates = 0; 1808 break; 1809 1810 default: 1811 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1812 } 1813 1814 if (sync || (time_uptime - st->pfsync_time) < 2) 1815 pfsync_push(sc); 1816 1817 PFSYNC_UNLOCK(sc); 1818 } 1819 1820 static void 1821 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1822 { 1823 struct pfsync_softc *sc = V_pfsyncif; 1824 struct pfsync_upd_req_item *item; 1825 size_t nlen = sizeof(struct pfsync_upd_req); 1826 1827 PFSYNC_LOCK_ASSERT(sc); 1828 1829 /* 1830 * This code does a bit to prevent multiple update requests for the 1831 * same state being generated. It searches current subheader queue, 1832 * but it doesn't lookup into queue of already packed datagrams. 1833 */ 1834 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1835 if (item->ur_msg.id == id && 1836 item->ur_msg.creatorid == creatorid) 1837 return; 1838 1839 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1840 if (item == NULL) 1841 return; /* XXX stats */ 1842 1843 item->ur_msg.id = id; 1844 item->ur_msg.creatorid = creatorid; 1845 1846 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1847 nlen += sizeof(struct pfsync_subheader); 1848 1849 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1850 pfsync_sendout(1); 1851 1852 nlen = sizeof(struct pfsync_subheader) + 1853 sizeof(struct pfsync_upd_req); 1854 } 1855 1856 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1857 sc->sc_len += nlen; 1858 } 1859 1860 static void 1861 pfsync_update_state_req(struct pf_state *st) 1862 { 1863 struct pfsync_softc *sc = V_pfsyncif; 1864 1865 PF_STATE_LOCK_ASSERT(st); 1866 PFSYNC_LOCK(sc); 1867 1868 if (st->state_flags & PFSTATE_NOSYNC) { 1869 if (st->sync_state != PFSYNC_S_NONE) 1870 pfsync_q_del(st); 1871 PFSYNC_UNLOCK(sc); 1872 return; 1873 } 1874 1875 switch (st->sync_state) { 1876 case PFSYNC_S_UPD_C: 1877 case PFSYNC_S_IACK: 1878 pfsync_q_del(st); 1879 case PFSYNC_S_NONE: 1880 pfsync_q_ins(st, PFSYNC_S_UPD); 1881 pfsync_push(sc); 1882 break; 1883 1884 case PFSYNC_S_INS: 1885 case PFSYNC_S_UPD: 1886 case PFSYNC_S_DEL: 1887 /* we're already handling it */ 1888 break; 1889 1890 default: 1891 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1892 } 1893 1894 PFSYNC_UNLOCK(sc); 1895 } 1896 1897 static void 1898 pfsync_delete_state(struct pf_state *st) 1899 { 1900 struct pfsync_softc *sc = V_pfsyncif; 1901 1902 PFSYNC_LOCK(sc); 1903 if (st->state_flags & PFSTATE_ACK) 1904 pfsync_undefer_state(st, 1); 1905 if (st->state_flags & PFSTATE_NOSYNC) { 1906 if (st->sync_state != PFSYNC_S_NONE) 1907 pfsync_q_del(st); 1908 PFSYNC_UNLOCK(sc); 1909 return; 1910 } 1911 1912 if (sc->sc_len == PFSYNC_MINPKT) 1913 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1914 1915 switch (st->sync_state) { 1916 case PFSYNC_S_INS: 1917 /* We never got to tell the world so just forget about it. */ 1918 pfsync_q_del(st); 1919 break; 1920 1921 case PFSYNC_S_UPD_C: 1922 case PFSYNC_S_UPD: 1923 case PFSYNC_S_IACK: 1924 pfsync_q_del(st); 1925 /* FALLTHROUGH to putting it on the del list */ 1926 1927 case PFSYNC_S_NONE: 1928 pfsync_q_ins(st, PFSYNC_S_DEL); 1929 break; 1930 1931 default: 1932 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1933 } 1934 PFSYNC_UNLOCK(sc); 1935 } 1936 1937 static void 1938 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1939 { 1940 struct pfsync_softc *sc = V_pfsyncif; 1941 struct { 1942 struct pfsync_subheader subh; 1943 struct pfsync_clr clr; 1944 } __packed r; 1945 1946 bzero(&r, sizeof(r)); 1947 1948 r.subh.action = PFSYNC_ACT_CLR; 1949 r.subh.count = htons(1); 1950 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1951 1952 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1953 r.clr.creatorid = creatorid; 1954 1955 PFSYNC_LOCK(sc); 1956 pfsync_send_plus(&r, sizeof(r)); 1957 PFSYNC_UNLOCK(sc); 1958 } 1959 1960 static void 1961 pfsync_q_ins(struct pf_state *st, int q) 1962 { 1963 struct pfsync_softc *sc = V_pfsyncif; 1964 size_t nlen = pfsync_qs[q].len; 1965 1966 PFSYNC_LOCK_ASSERT(sc); 1967 1968 KASSERT(st->sync_state == PFSYNC_S_NONE, 1969 ("%s: st->sync_state == PFSYNC_S_NONE", __func__)); 1970 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 1971 sc->sc_len)); 1972 1973 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1974 nlen += sizeof(struct pfsync_subheader); 1975 1976 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1977 pfsync_sendout(1); 1978 1979 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 1980 } 1981 1982 sc->sc_len += nlen; 1983 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 1984 st->sync_state = q; 1985 pf_ref_state(st); 1986 } 1987 1988 static void 1989 pfsync_q_del(struct pf_state *st) 1990 { 1991 struct pfsync_softc *sc = V_pfsyncif; 1992 int q = st->sync_state; 1993 1994 PFSYNC_LOCK_ASSERT(sc); 1995 KASSERT(st->sync_state != PFSYNC_S_NONE, 1996 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 1997 1998 sc->sc_len -= pfsync_qs[q].len; 1999 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2000 st->sync_state = PFSYNC_S_NONE; 2001 pf_release_state(st); 2002 2003 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2004 sc->sc_len -= sizeof(struct pfsync_subheader); 2005 } 2006 2007 static void 2008 pfsync_bulk_start(void) 2009 { 2010 struct pfsync_softc *sc = V_pfsyncif; 2011 2012 if (V_pf_status.debug >= PF_DEBUG_MISC) 2013 printf("pfsync: received bulk update request\n"); 2014 2015 PFSYNC_BLOCK(sc); 2016 2017 sc->sc_ureq_received = time_uptime; 2018 sc->sc_bulk_hashid = 0; 2019 sc->sc_bulk_stateid = 0; 2020 pfsync_bulk_status(PFSYNC_BUS_START); 2021 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2022 PFSYNC_BUNLOCK(sc); 2023 } 2024 2025 static void 2026 pfsync_bulk_update(void *arg) 2027 { 2028 struct pfsync_softc *sc = arg; 2029 struct pf_state *s; 2030 int i, sent = 0; 2031 2032 PFSYNC_BLOCK_ASSERT(sc); 2033 CURVNET_SET(sc->sc_ifp->if_vnet); 2034 2035 /* 2036 * Start with last state from previous invocation. 2037 * It may had gone, in this case start from the 2038 * hash slot. 2039 */ 2040 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2041 2042 if (s != NULL) 2043 i = PF_IDHASH(s); 2044 else 2045 i = sc->sc_bulk_hashid; 2046 2047 for (; i <= V_pf_hashmask; i++) { 2048 struct pf_idhash *ih = &V_pf_idhash[i]; 2049 2050 if (s != NULL) 2051 PF_HASHROW_ASSERT(ih); 2052 else { 2053 PF_HASHROW_LOCK(ih); 2054 s = LIST_FIRST(&ih->states); 2055 } 2056 2057 for (; s; s = LIST_NEXT(s, entry)) { 2058 2059 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2060 sizeof(struct pfsync_state)) { 2061 /* We've filled a packet. */ 2062 sc->sc_bulk_hashid = i; 2063 sc->sc_bulk_stateid = s->id; 2064 sc->sc_bulk_creatorid = s->creatorid; 2065 PF_HASHROW_UNLOCK(ih); 2066 callout_reset(&sc->sc_bulk_tmo, 1, 2067 pfsync_bulk_update, sc); 2068 goto full; 2069 } 2070 2071 if (s->sync_state == PFSYNC_S_NONE && 2072 s->timeout < PFTM_MAX && 2073 s->pfsync_time <= sc->sc_ureq_received) { 2074 pfsync_update_state_req(s); 2075 sent++; 2076 } 2077 } 2078 PF_HASHROW_UNLOCK(ih); 2079 } 2080 2081 /* We're done. */ 2082 pfsync_bulk_status(PFSYNC_BUS_END); 2083 2084 full: 2085 CURVNET_RESTORE(); 2086 } 2087 2088 static void 2089 pfsync_bulk_status(u_int8_t status) 2090 { 2091 struct { 2092 struct pfsync_subheader subh; 2093 struct pfsync_bus bus; 2094 } __packed r; 2095 2096 struct pfsync_softc *sc = V_pfsyncif; 2097 2098 bzero(&r, sizeof(r)); 2099 2100 r.subh.action = PFSYNC_ACT_BUS; 2101 r.subh.count = htons(1); 2102 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2103 2104 r.bus.creatorid = V_pf_status.hostid; 2105 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2106 r.bus.status = status; 2107 2108 PFSYNC_LOCK(sc); 2109 pfsync_send_plus(&r, sizeof(r)); 2110 PFSYNC_UNLOCK(sc); 2111 } 2112 2113 static void 2114 pfsync_bulk_fail(void *arg) 2115 { 2116 struct pfsync_softc *sc = arg; 2117 2118 CURVNET_SET(sc->sc_ifp->if_vnet); 2119 2120 PFSYNC_BLOCK_ASSERT(sc); 2121 2122 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2123 /* Try again */ 2124 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2125 pfsync_bulk_fail, V_pfsyncif); 2126 PFSYNC_LOCK(sc); 2127 pfsync_request_update(0, 0); 2128 PFSYNC_UNLOCK(sc); 2129 } else { 2130 /* Pretend like the transfer was ok. */ 2131 sc->sc_ureq_sent = 0; 2132 sc->sc_bulk_tries = 0; 2133 PFSYNC_LOCK(sc); 2134 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2135 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2136 "pfsync bulk fail"); 2137 sc->sc_flags |= PFSYNCF_OK; 2138 PFSYNC_UNLOCK(sc); 2139 if (V_pf_status.debug >= PF_DEBUG_MISC) 2140 printf("pfsync: failed to receive bulk update\n"); 2141 } 2142 2143 CURVNET_RESTORE(); 2144 } 2145 2146 static void 2147 pfsync_send_plus(void *plus, size_t pluslen) 2148 { 2149 struct pfsync_softc *sc = V_pfsyncif; 2150 2151 PFSYNC_LOCK_ASSERT(sc); 2152 2153 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2154 pfsync_sendout(1); 2155 2156 sc->sc_plus = plus; 2157 sc->sc_len += (sc->sc_pluslen = pluslen); 2158 2159 pfsync_sendout(1); 2160 } 2161 2162 static void 2163 pfsync_timeout(void *arg) 2164 { 2165 struct pfsync_softc *sc = arg; 2166 2167 CURVNET_SET(sc->sc_ifp->if_vnet); 2168 PFSYNC_LOCK(sc); 2169 pfsync_push(sc); 2170 PFSYNC_UNLOCK(sc); 2171 CURVNET_RESTORE(); 2172 } 2173 2174 static void 2175 pfsync_push(struct pfsync_softc *sc) 2176 { 2177 2178 PFSYNC_LOCK_ASSERT(sc); 2179 2180 sc->sc_flags |= PFSYNCF_PUSH; 2181 swi_sched(V_pfsync_swi_cookie, 0); 2182 } 2183 2184 static void 2185 pfsyncintr(void *arg) 2186 { 2187 struct pfsync_softc *sc = arg; 2188 struct mbuf *m, *n; 2189 2190 CURVNET_SET(sc->sc_ifp->if_vnet); 2191 2192 PFSYNC_LOCK(sc); 2193 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2194 pfsync_sendout(0); 2195 sc->sc_flags &= ~PFSYNCF_PUSH; 2196 } 2197 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2198 PFSYNC_UNLOCK(sc); 2199 2200 for (; m != NULL; m = n) { 2201 2202 n = m->m_nextpkt; 2203 m->m_nextpkt = NULL; 2204 2205 /* 2206 * We distinguish between a deferral packet and our 2207 * own pfsync packet based on M_SKIP_FIREWALL 2208 * flag. This is XXX. 2209 */ 2210 if (m->m_flags & M_SKIP_FIREWALL) 2211 ip_output(m, NULL, NULL, 0, NULL, NULL); 2212 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2213 NULL) == 0) 2214 V_pfsyncstats.pfsyncs_opackets++; 2215 else 2216 V_pfsyncstats.pfsyncs_oerrors++; 2217 } 2218 CURVNET_RESTORE(); 2219 } 2220 2221 static int 2222 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2223 { 2224 struct ip_moptions *imo = &sc->sc_imo; 2225 int error; 2226 2227 if (!(ifp->if_flags & IFF_MULTICAST)) 2228 return (EADDRNOTAVAIL); 2229 2230 imo->imo_membership = (struct in_multi **)mship; 2231 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2232 imo->imo_multicast_vif = -1; 2233 2234 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2235 &imo->imo_membership[0])) != 0) { 2236 imo->imo_membership = NULL; 2237 return (error); 2238 } 2239 imo->imo_num_memberships++; 2240 imo->imo_multicast_ifp = ifp; 2241 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2242 imo->imo_multicast_loop = 0; 2243 2244 return (0); 2245 } 2246 2247 static void 2248 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2249 { 2250 struct ip_moptions *imo = &sc->sc_imo; 2251 2252 in_leavegroup(imo->imo_membership[0], NULL); 2253 free(imo->imo_membership, M_PFSYNC); 2254 imo->imo_membership = NULL; 2255 imo->imo_multicast_ifp = NULL; 2256 } 2257 2258 #ifdef INET 2259 extern struct domain inetdomain; 2260 static struct protosw in_pfsync_protosw = { 2261 .pr_type = SOCK_RAW, 2262 .pr_domain = &inetdomain, 2263 .pr_protocol = IPPROTO_PFSYNC, 2264 .pr_flags = PR_ATOMIC|PR_ADDR, 2265 .pr_input = pfsync_input, 2266 .pr_output = (pr_output_t *)rip_output, 2267 .pr_ctloutput = rip_ctloutput, 2268 .pr_usrreqs = &rip_usrreqs 2269 }; 2270 #endif 2271 2272 static void 2273 pfsync_pointers_init() 2274 { 2275 2276 PF_RULES_WLOCK(); 2277 pfsync_state_import_ptr = pfsync_state_import; 2278 pfsync_insert_state_ptr = pfsync_insert_state; 2279 pfsync_update_state_ptr = pfsync_update_state; 2280 pfsync_delete_state_ptr = pfsync_delete_state; 2281 pfsync_clear_states_ptr = pfsync_clear_states; 2282 pfsync_defer_ptr = pfsync_defer; 2283 PF_RULES_WUNLOCK(); 2284 } 2285 2286 static void 2287 pfsync_pointers_uninit() 2288 { 2289 2290 PF_RULES_WLOCK(); 2291 pfsync_state_import_ptr = NULL; 2292 pfsync_insert_state_ptr = NULL; 2293 pfsync_update_state_ptr = NULL; 2294 pfsync_delete_state_ptr = NULL; 2295 pfsync_clear_states_ptr = NULL; 2296 pfsync_defer_ptr = NULL; 2297 PF_RULES_WUNLOCK(); 2298 } 2299 2300 static int 2301 pfsync_init() 2302 { 2303 VNET_ITERATOR_DECL(vnet_iter); 2304 int error = 0; 2305 2306 VNET_LIST_RLOCK(); 2307 VNET_FOREACH(vnet_iter) { 2308 CURVNET_SET(vnet_iter); 2309 V_pfsync_cloner = if_clone_simple(pfsyncname, 2310 pfsync_clone_create, pfsync_clone_destroy, 1); 2311 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2312 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2313 CURVNET_RESTORE(); 2314 if (error) 2315 goto fail_locked; 2316 } 2317 VNET_LIST_RUNLOCK(); 2318 #ifdef INET 2319 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2320 if (error) 2321 goto fail; 2322 error = ipproto_register(IPPROTO_PFSYNC); 2323 if (error) { 2324 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2325 goto fail; 2326 } 2327 #endif 2328 pfsync_pointers_init(); 2329 2330 return (0); 2331 2332 fail: 2333 VNET_LIST_RLOCK(); 2334 fail_locked: 2335 VNET_FOREACH(vnet_iter) { 2336 CURVNET_SET(vnet_iter); 2337 if (V_pfsync_swi_cookie) { 2338 swi_remove(V_pfsync_swi_cookie); 2339 if_clone_detach(V_pfsync_cloner); 2340 } 2341 CURVNET_RESTORE(); 2342 } 2343 VNET_LIST_RUNLOCK(); 2344 2345 return (error); 2346 } 2347 2348 static void 2349 pfsync_uninit() 2350 { 2351 VNET_ITERATOR_DECL(vnet_iter); 2352 2353 pfsync_pointers_uninit(); 2354 2355 ipproto_unregister(IPPROTO_PFSYNC); 2356 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2357 VNET_LIST_RLOCK(); 2358 VNET_FOREACH(vnet_iter) { 2359 CURVNET_SET(vnet_iter); 2360 if_clone_detach(V_pfsync_cloner); 2361 swi_remove(V_pfsync_swi_cookie); 2362 CURVNET_RESTORE(); 2363 } 2364 VNET_LIST_RUNLOCK(); 2365 } 2366 2367 static int 2368 pfsync_modevent(module_t mod, int type, void *data) 2369 { 2370 int error = 0; 2371 2372 switch (type) { 2373 case MOD_LOAD: 2374 error = pfsync_init(); 2375 break; 2376 case MOD_QUIESCE: 2377 /* 2378 * Module should not be unloaded due to race conditions. 2379 */ 2380 error = EBUSY; 2381 break; 2382 case MOD_UNLOAD: 2383 pfsync_uninit(); 2384 break; 2385 default: 2386 error = EINVAL; 2387 break; 2388 } 2389 2390 return (error); 2391 } 2392 2393 static moduledata_t pfsync_mod = { 2394 pfsyncname, 2395 pfsync_modevent, 2396 0 2397 }; 2398 2399 #define PFSYNC_MODVER 1 2400 2401 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2402 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2403 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2404