xref: /freebsd/sys/netpfil/pf/if_pfsync.c (revision b5a3a89c50671a1ad29e7c43fe15e7b16feac239)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause AND ISC)
3  *
4  * Copyright (c) 2002 Michael Shalayeff
5  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*-
31  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
32  *
33  * Permission to use, copy, modify, and distribute this software for any
34  * purpose with or without fee is hereby granted, provided that the above
35  * copyright notice and this permission notice appear in all copies.
36  *
37  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
38  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
39  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
40  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
41  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
42  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
43  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44  */
45 
46 /*
47  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
48  *
49  * Revisions picked from OpenBSD after revision 1.110 import:
50  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
51  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
52  * 1.120, 1.175 - use monotonic time_uptime
53  * 1.122 - reduce number of updates for non-TCP sessions
54  * 1.125, 1.127 - rewrite merge or stale processing
55  * 1.128 - cleanups
56  * 1.146 - bzero() mbuf before sparsely filling it with data
57  * 1.170 - SIOCSIFMTU checks
58  * 1.126, 1.142 - deferred packets processing
59  * 1.173 - correct expire time processing
60  */
61 
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64 
65 #include "opt_inet.h"
66 #include "opt_inet6.h"
67 #include "opt_pf.h"
68 
69 #include <sys/param.h>
70 #include <sys/bus.h>
71 #include <sys/endian.h>
72 #include <sys/interrupt.h>
73 #include <sys/kernel.h>
74 #include <sys/lock.h>
75 #include <sys/mbuf.h>
76 #include <sys/module.h>
77 #include <sys/mutex.h>
78 #include <sys/nv.h>
79 #include <sys/priv.h>
80 #include <sys/smp.h>
81 #include <sys/socket.h>
82 #include <sys/sockio.h>
83 #include <sys/sysctl.h>
84 #include <sys/syslog.h>
85 
86 #include <net/bpf.h>
87 #include <net/if.h>
88 #include <net/if_var.h>
89 #include <net/if_clone.h>
90 #include <net/if_private.h>
91 #include <net/if_types.h>
92 #include <net/vnet.h>
93 #include <net/pfvar.h>
94 #include <net/route.h>
95 #include <net/if_pfsync.h>
96 
97 #include <netinet/if_ether.h>
98 #include <netinet/in.h>
99 #include <netinet/in_var.h>
100 #include <netinet6/in6_var.h>
101 #include <netinet/ip.h>
102 #include <netinet/ip6.h>
103 #include <netinet/ip_carp.h>
104 #include <netinet/ip_var.h>
105 #include <netinet/tcp.h>
106 #include <netinet/tcp_fsm.h>
107 #include <netinet/tcp_seq.h>
108 
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #include <netinet6/scope6_var.h>
112 
113 #include <netpfil/pf/pfsync_nv.h>
114 
115 struct pfsync_bucket;
116 struct pfsync_softc;
117 
118 union inet_template {
119 	struct ip	ipv4;
120 	struct ip6_hdr	ipv6;
121 };
122 
123 #define PFSYNC_MINPKT ( \
124 	sizeof(union inet_template) + \
125 	sizeof(struct pfsync_header) + \
126 	sizeof(struct pfsync_subheader) )
127 
128 static int	pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *,
129 		    struct pfsync_state_peer *);
130 static int	pfsync_in_clr(struct mbuf *, int, int, int, int);
131 static int	pfsync_in_ins(struct mbuf *, int, int, int, int);
132 static int	pfsync_in_iack(struct mbuf *, int, int, int, int);
133 static int	pfsync_in_upd(struct mbuf *, int, int, int, int);
134 static int	pfsync_in_upd_c(struct mbuf *, int, int, int, int);
135 static int	pfsync_in_ureq(struct mbuf *, int, int, int, int);
136 static int	pfsync_in_del_c(struct mbuf *, int, int, int, int);
137 static int	pfsync_in_bus(struct mbuf *, int, int, int, int);
138 static int	pfsync_in_tdb(struct mbuf *, int, int, int, int);
139 static int	pfsync_in_eof(struct mbuf *, int, int, int, int);
140 static int	pfsync_in_error(struct mbuf *, int, int, int, int);
141 
142 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = {
143 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
144 	pfsync_in_ins,			/* PFSYNC_ACT_INS_1301 */
145 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
146 	pfsync_in_upd,			/* PFSYNC_ACT_UPD_1301 */
147 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
148 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
149 	pfsync_in_error,		/* PFSYNC_ACT_DEL */
150 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
151 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
152 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
153 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
154 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
155 	pfsync_in_eof,			/* PFSYNC_ACT_EOF */
156 	pfsync_in_ins,			/* PFSYNC_ACT_INS_1400 */
157 	pfsync_in_upd,			/* PFSYNC_ACT_UPD_1400 */
158 };
159 
160 struct pfsync_q {
161 	void		(*write)(struct pf_kstate *, void *);
162 	size_t		len;
163 	u_int8_t	action;
164 };
165 
166 /* We have the following sync queues */
167 enum pfsync_q_id {
168 	PFSYNC_Q_INS_1301,
169 	PFSYNC_Q_INS_1400,
170 	PFSYNC_Q_IACK,
171 	PFSYNC_Q_UPD_1301,
172 	PFSYNC_Q_UPD_1400,
173 	PFSYNC_Q_UPD_C,
174 	PFSYNC_Q_DEL_C,
175 	PFSYNC_Q_COUNT,
176 };
177 
178 /* Functions for building messages for given queue */
179 static void	pfsync_out_state_1301(struct pf_kstate *, void *);
180 static void	pfsync_out_state_1400(struct pf_kstate *, void *);
181 static void	pfsync_out_iack(struct pf_kstate *, void *);
182 static void	pfsync_out_upd_c(struct pf_kstate *, void *);
183 static void	pfsync_out_del_c(struct pf_kstate *, void *);
184 
185 /* Attach those functions to queue */
186 static struct pfsync_q pfsync_qs[] = {
187 	{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 },
188 	{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 },
189 	{ pfsync_out_iack,       sizeof(struct pfsync_ins_ack),    PFSYNC_ACT_INS_ACK },
190 	{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 },
191 	{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 },
192 	{ pfsync_out_upd_c,      sizeof(struct pfsync_upd_c),      PFSYNC_ACT_UPD_C },
193 	{ pfsync_out_del_c,      sizeof(struct pfsync_del_c),      PFSYNC_ACT_DEL_C }
194 };
195 
196 /* Map queue to pf_kstate->sync_state */
197 static u_int8_t pfsync_qid_sstate[] = {
198 	PFSYNC_S_INS,   /* PFSYNC_Q_INS_1301 */
199 	PFSYNC_S_INS,   /* PFSYNC_Q_INS_1400 */
200 	PFSYNC_S_IACK,  /* PFSYNC_Q_IACK */
201 	PFSYNC_S_UPD,   /* PFSYNC_Q_UPD_1301 */
202 	PFSYNC_S_UPD,   /* PFSYNC_Q_UPD_1400 */
203 	PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */
204 	PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */
205 };
206 
207 /* Map pf_kstate->sync_state to queue */
208 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t);
209 
210 static void	pfsync_q_ins(struct pf_kstate *, int sync_state, bool);
211 static void	pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *);
212 
213 static void	pfsync_update_state(struct pf_kstate *);
214 static void	pfsync_tx(struct pfsync_softc *, struct mbuf *);
215 
216 struct pfsync_upd_req_item {
217 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
218 	struct pfsync_upd_req			ur_msg;
219 };
220 
221 struct pfsync_deferral {
222 	struct pfsync_softc		*pd_sc;
223 	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
224 	struct callout			pd_tmo;
225 
226 	struct pf_kstate		*pd_st;
227 	struct mbuf			*pd_m;
228 };
229 
230 struct pfsync_bucket
231 {
232 	int			b_id;
233 	struct pfsync_softc	*b_sc;
234 	struct mtx		b_mtx;
235 	struct callout		b_tmo;
236 	int			b_flags;
237 #define	PFSYNCF_BUCKET_PUSH	0x00000001
238 
239 	size_t			b_len;
240 	TAILQ_HEAD(, pf_kstate)			b_qs[PFSYNC_Q_COUNT];
241 	TAILQ_HEAD(, pfsync_upd_req_item)	b_upd_req_list;
242 	TAILQ_HEAD(, pfsync_deferral)		b_deferrals;
243 	u_int			b_deferred;
244 	void			*b_plus;
245 	size_t			b_pluslen;
246 
247 	struct  ifaltq b_snd;
248 };
249 
250 struct pfsync_softc {
251 	/* Configuration */
252 	struct ifnet		*sc_ifp;
253 	struct ifnet		*sc_sync_if;
254 	struct ip_moptions	sc_imo;
255 	struct ip6_moptions	sc_im6o;
256 	struct sockaddr_storage	sc_sync_peer;
257 	uint32_t		sc_flags;
258 	uint8_t			sc_maxupdates;
259 	union inet_template     sc_template;
260 	struct mtx		sc_mtx;
261 	uint32_t		sc_version;
262 
263 	/* Queued data */
264 	struct pfsync_bucket	*sc_buckets;
265 
266 	/* Bulk update info */
267 	struct mtx		sc_bulk_mtx;
268 	uint32_t		sc_ureq_sent;
269 	int			sc_bulk_tries;
270 	uint32_t		sc_ureq_received;
271 	int			sc_bulk_hashid;
272 	uint64_t		sc_bulk_stateid;
273 	uint32_t		sc_bulk_creatorid;
274 	struct callout		sc_bulk_tmo;
275 	struct callout		sc_bulkfail_tmo;
276 };
277 
278 #define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
279 #define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
280 #define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
281 
282 #define PFSYNC_BUCKET_LOCK(b)		mtx_lock(&(b)->b_mtx)
283 #define PFSYNC_BUCKET_UNLOCK(b)		mtx_unlock(&(b)->b_mtx)
284 #define PFSYNC_BUCKET_LOCK_ASSERT(b)	mtx_assert(&(b)->b_mtx, MA_OWNED)
285 
286 #define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
287 #define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
288 #define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
289 
290 #define PFSYNC_DEFER_TIMEOUT	20
291 
292 static const char pfsyncname[] = "pfsync";
293 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
294 VNET_DEFINE_STATIC(struct pfsync_softc	*, pfsyncif) = NULL;
295 #define	V_pfsyncif		VNET(pfsyncif)
296 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
297 #define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
298 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie);
299 #define	V_pfsync_swi_ie		VNET(pfsync_swi_ie)
300 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
301 #define	V_pfsyncstats		VNET(pfsyncstats)
302 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
303 #define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
304 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT;
305 #define	V_pfsync_defer_timeout	VNET(pfsync_defer_timeout)
306 
307 static void	pfsync_timeout(void *);
308 static void	pfsync_push(struct pfsync_bucket *);
309 static void	pfsync_push_all(struct pfsync_softc *);
310 static void	pfsyncintr(void *);
311 static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
312 		    struct in_mfilter *, struct in6_mfilter *);
313 static void	pfsync_multicast_cleanup(struct pfsync_softc *);
314 static void	pfsync_pointers_init(void);
315 static void	pfsync_pointers_uninit(void);
316 static int	pfsync_init(void);
317 static void	pfsync_uninit(void);
318 
319 static unsigned long pfsync_buckets;
320 
321 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
322     "PFSYNC");
323 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
324     &VNET_NAME(pfsyncstats), pfsyncstats,
325     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
326 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW,
327     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
328 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
329     &pfsync_buckets, 0, "Number of pfsync hash buckets");
330 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW,
331     &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)");
332 
333 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
334 static void	pfsync_clone_destroy(struct ifnet *);
335 static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
336 		    struct pf_state_peer *);
337 static int	pfsyncoutput(struct ifnet *, struct mbuf *,
338 		    const struct sockaddr *, struct route *);
339 static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
340 
341 static int	pfsync_defer(struct pf_kstate *, struct mbuf *);
342 static void	pfsync_undefer(struct pfsync_deferral *, int);
343 static void	pfsync_undefer_state_locked(struct pf_kstate *, int);
344 static void	pfsync_undefer_state(struct pf_kstate *, int);
345 static void	pfsync_defer_tmo(void *);
346 
347 static void	pfsync_request_update(u_int32_t, u_int64_t);
348 static bool	pfsync_update_state_req(struct pf_kstate *);
349 
350 static void	pfsync_drop(struct pfsync_softc *);
351 static void	pfsync_sendout(int, int);
352 static void	pfsync_send_plus(void *, size_t);
353 
354 static void	pfsync_bulk_start(void);
355 static void	pfsync_bulk_status(u_int8_t);
356 static void	pfsync_bulk_update(void *);
357 static void	pfsync_bulk_fail(void *);
358 
359 static void	pfsync_detach_ifnet(struct ifnet *);
360 
361 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *,
362     struct pfsync_kstatus *);
363 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *,
364     struct pfsync_softc *);
365 
366 #ifdef IPSEC
367 static void	pfsync_update_net_tdb(struct pfsync_tdb *);
368 #endif
369 static struct pfsync_bucket	*pfsync_get_bucket(struct pfsync_softc *,
370 		    struct pf_kstate *);
371 
372 #define PFSYNC_MAX_BULKTRIES	12
373 
374 VNET_DEFINE(struct if_clone *, pfsync_cloner);
375 #define	V_pfsync_cloner	VNET(pfsync_cloner)
376 
377 const struct in6_addr in6addr_linklocal_pfsync_group =
378 	{{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
379 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}};
380 static int
381 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
382 {
383 	struct pfsync_softc *sc;
384 	struct ifnet *ifp;
385 	struct pfsync_bucket *b;
386 	int c;
387 	enum pfsync_q_id q;
388 
389 	if (unit != 0)
390 		return (EINVAL);
391 
392 	if (! pfsync_buckets)
393 		pfsync_buckets = mp_ncpus * 2;
394 
395 	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
396 	sc->sc_flags |= PFSYNCF_OK;
397 	sc->sc_maxupdates = 128;
398 	sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
399 
400 	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
401 	if (ifp == NULL) {
402 		free(sc, M_PFSYNC);
403 		return (ENOSPC);
404 	}
405 	if_initname(ifp, pfsyncname, unit);
406 	ifp->if_softc = sc;
407 	ifp->if_ioctl = pfsyncioctl;
408 	ifp->if_output = pfsyncoutput;
409 	ifp->if_type = IFT_PFSYNC;
410 	ifp->if_hdrlen = sizeof(struct pfsync_header);
411 	ifp->if_mtu = ETHERMTU;
412 	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
413 	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
414 	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
415 	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
416 
417 	if_attach(ifp);
418 
419 	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
420 
421 	sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
422 	    M_PFSYNC, M_ZERO | M_WAITOK);
423 	for (c = 0; c < pfsync_buckets; c++) {
424 		b = &sc->sc_buckets[c];
425 		mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
426 
427 		b->b_id = c;
428 		b->b_sc = sc;
429 		b->b_len = PFSYNC_MINPKT;
430 
431 		for (q = 0; q < PFSYNC_Q_COUNT; q++)
432 			TAILQ_INIT(&b->b_qs[q]);
433 
434 		TAILQ_INIT(&b->b_upd_req_list);
435 		TAILQ_INIT(&b->b_deferrals);
436 
437 		callout_init(&b->b_tmo, 1);
438 
439 		b->b_snd.ifq_maxlen = ifqmaxlen;
440 	}
441 
442 	V_pfsyncif = sc;
443 
444 	return (0);
445 }
446 
447 static void
448 pfsync_clone_destroy(struct ifnet *ifp)
449 {
450 	struct pfsync_softc *sc = ifp->if_softc;
451 	struct pfsync_bucket *b;
452 	int c, ret;
453 
454 	for (c = 0; c < pfsync_buckets; c++) {
455 		b = &sc->sc_buckets[c];
456 		/*
457 		 * At this stage, everything should have already been
458 		 * cleared by pfsync_uninit(), and we have only to
459 		 * drain callouts.
460 		 */
461 		PFSYNC_BUCKET_LOCK(b);
462 		while (b->b_deferred > 0) {
463 			struct pfsync_deferral *pd =
464 			    TAILQ_FIRST(&b->b_deferrals);
465 
466 			ret = callout_stop(&pd->pd_tmo);
467 			PFSYNC_BUCKET_UNLOCK(b);
468 			if (ret > 0) {
469 				pfsync_undefer(pd, 1);
470 			} else {
471 				callout_drain(&pd->pd_tmo);
472 			}
473 			PFSYNC_BUCKET_LOCK(b);
474 		}
475 		MPASS(b->b_deferred == 0);
476 		MPASS(TAILQ_EMPTY(&b->b_deferrals));
477 		PFSYNC_BUCKET_UNLOCK(b);
478 
479 		callout_drain(&b->b_tmo);
480 	}
481 
482 	callout_drain(&sc->sc_bulkfail_tmo);
483 	callout_drain(&sc->sc_bulk_tmo);
484 
485 	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
486 		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
487 	bpfdetach(ifp);
488 	if_detach(ifp);
489 
490 	pfsync_drop(sc);
491 
492 	if_free(ifp);
493 	pfsync_multicast_cleanup(sc);
494 	mtx_destroy(&sc->sc_mtx);
495 	mtx_destroy(&sc->sc_bulk_mtx);
496 
497 	free(sc->sc_buckets, M_PFSYNC);
498 	free(sc, M_PFSYNC);
499 
500 	V_pfsyncif = NULL;
501 }
502 
503 static int
504 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
505     struct pf_state_peer *d)
506 {
507 	if (s->scrub.scrub_flag && d->scrub == NULL) {
508 		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
509 		if (d->scrub == NULL)
510 			return (ENOMEM);
511 	}
512 
513 	return (0);
514 }
515 
516 static int
517 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
518 {
519 	struct pfsync_softc *sc = V_pfsyncif;
520 #ifndef	__NO_STRICT_ALIGNMENT
521 	struct pfsync_state_key key[2];
522 #endif
523 	struct pfsync_state_key *kw, *ks;
524 	struct pf_kstate	*st = NULL;
525 	struct pf_state_key *skw = NULL, *sks = NULL;
526 	struct pf_krule *r = NULL;
527 	struct pfi_kkif	*kif;
528 	int error;
529 
530 	PF_RULES_RASSERT();
531 
532 	if (sp->pfs_1301.creatorid == 0) {
533 		if (V_pf_status.debug >= PF_DEBUG_MISC)
534 			printf("%s: invalid creator id: %08x\n", __func__,
535 			    ntohl(sp->pfs_1301.creatorid));
536 		return (EINVAL);
537 	}
538 
539 	if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) {
540 		if (V_pf_status.debug >= PF_DEBUG_MISC)
541 			printf("%s: unknown interface: %s\n", __func__,
542 			    sp->pfs_1301.ifname);
543 		if (flags & PFSYNC_SI_IOCTL)
544 			return (EINVAL);
545 		return (0);	/* skip this state */
546 	}
547 
548 	/*
549 	 * If the ruleset checksums match or the state is coming from the ioctl,
550 	 * it's safe to associate the state with the rule of that number.
551 	 */
552 	if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) &&
553 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) <
554 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
555 		r = pf_main_ruleset.rules[
556 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)];
557 	else
558 		r = &V_pf_default_rule;
559 
560 	if ((r->max_states &&
561 	    counter_u64_fetch(r->states_cur) >= r->max_states))
562 		goto cleanup;
563 
564 	/*
565 	 * XXXGL: consider M_WAITOK in ioctl path after.
566 	 */
567 	st = pf_alloc_state(M_NOWAIT);
568 	if (__predict_false(st == NULL))
569 		goto cleanup;
570 
571 	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
572 		goto cleanup;
573 
574 #ifndef	__NO_STRICT_ALIGNMENT
575 	bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2);
576 	kw = &key[PF_SK_WIRE];
577 	ks = &key[PF_SK_STACK];
578 #else
579 	kw = &sp->pfs_1301.key[PF_SK_WIRE];
580 	ks = &sp->pfs_1301.key[PF_SK_STACK];
581 #endif
582 
583 	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) ||
584 	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) ||
585 	    kw->port[0] != ks->port[0] ||
586 	    kw->port[1] != ks->port[1]) {
587 		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
588 		if (sks == NULL)
589 			goto cleanup;
590 	} else
591 		sks = skw;
592 
593 	/* allocate memory for scrub info */
594 	if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) ||
595 	    pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst))
596 		goto cleanup;
597 
598 	/* Copy to state key(s). */
599 	skw->addr[0] = kw->addr[0];
600 	skw->addr[1] = kw->addr[1];
601 	skw->port[0] = kw->port[0];
602 	skw->port[1] = kw->port[1];
603 	skw->proto = sp->pfs_1301.proto;
604 	skw->af = sp->pfs_1301.af;
605 	if (sks != skw) {
606 		sks->addr[0] = ks->addr[0];
607 		sks->addr[1] = ks->addr[1];
608 		sks->port[0] = ks->port[0];
609 		sks->port[1] = ks->port[1];
610 		sks->proto = sp->pfs_1301.proto;
611 		sks->af = sp->pfs_1301.af;
612 	}
613 
614 	/* copy to state */
615 	bcopy(&sp->pfs_1301.rt_addr, &st->rt_addr, sizeof(st->rt_addr));
616 	st->creation = time_uptime - ntohl(sp->pfs_1301.creation);
617 	st->expire = time_uptime;
618 	if (sp->pfs_1301.expire) {
619 		uint32_t timeout;
620 
621 		timeout = r->timeout[sp->pfs_1301.timeout];
622 		if (!timeout)
623 			timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout];
624 
625 		/* sp->expire may have been adaptively scaled by export. */
626 		st->expire -= timeout - ntohl(sp->pfs_1301.expire);
627 	}
628 
629 	st->direction = sp->pfs_1301.direction;
630 	st->act.log = sp->pfs_1301.log;
631 	st->timeout = sp->pfs_1301.timeout;
632 
633 	switch (msg_version) {
634 		case PFSYNC_MSG_VERSION_1301:
635 			st->state_flags = sp->pfs_1301.state_flags;
636 			/*
637 			 * In FreeBSD 13 pfsync lacks many attributes. Copy them
638 			 * from the rule if possible. If rule can't be matched
639 			 * clear any set options as we can't recover their
640 			 * parameters.
641 			*/
642 			if (r == &V_pf_default_rule) {
643 				st->state_flags &= ~PFSTATE_SETMASK;
644 			} else {
645 				/*
646 				 * Similar to pf_rule_to_actions(). This code
647 				 * won't set the actions properly if they come
648 				 * from multiple "match" rules as only rule
649 				 * creating the state is send over pfsync.
650 				 */
651 				st->act.qid = r->qid;
652 				st->act.pqid = r->pqid;
653 				st->act.rtableid = r->rtableid;
654 				if (r->scrub_flags & PFSTATE_SETTOS)
655 					st->act.set_tos = r->set_tos;
656 				st->act.min_ttl = r->min_ttl;
657 				st->act.max_mss = r->max_mss;
658 				st->state_flags |= (r->scrub_flags &
659 				    (PFSTATE_NODF|PFSTATE_RANDOMID|
660 				    PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|
661 				    PFSTATE_SETPRIO));
662 				if (r->dnpipe || r->dnrpipe) {
663 					if (r->free_flags & PFRULE_DN_IS_PIPE)
664 						st->state_flags |= PFSTATE_DN_IS_PIPE;
665 					else
666 						st->state_flags &= ~PFSTATE_DN_IS_PIPE;
667 				}
668 				st->act.dnpipe = r->dnpipe;
669 				st->act.dnrpipe = r->dnrpipe;
670 			}
671 			break;
672 		case PFSYNC_MSG_VERSION_1400:
673 			st->state_flags = ntohs(sp->pfs_1400.state_flags);
674 			st->act.qid = ntohs(sp->pfs_1400.qid);
675 			st->act.pqid = ntohs(sp->pfs_1400.pqid);
676 			st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe);
677 			st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe);
678 			st->act.rtableid = ntohl(sp->pfs_1400.rtableid);
679 			st->act.min_ttl = sp->pfs_1400.min_ttl;
680 			st->act.set_tos = sp->pfs_1400.set_tos;
681 			st->act.max_mss = ntohs(sp->pfs_1400.max_mss);
682 			st->act.set_prio[0] = sp->pfs_1400.set_prio[0];
683 			st->act.set_prio[1] = sp->pfs_1400.set_prio[1];
684 			st->rt = sp->pfs_1400.rt;
685 			if (st->rt && (st->rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname)) == NULL) {
686 				if (V_pf_status.debug >= PF_DEBUG_MISC)
687 					printf("%s: unknown route interface: %s\n",
688 					    __func__, sp->pfs_1400.rt_ifname);
689 				if (flags & PFSYNC_SI_IOCTL)
690 					return (EINVAL);
691 				return (0);	/* skip this state */
692 			}
693 			break;
694 		default:
695 			panic("%s: Unsupported pfsync_msg_version %d",
696 			    __func__, msg_version);
697 	}
698 
699 	st->id = sp->pfs_1301.id;
700 	st->creatorid = sp->pfs_1301.creatorid;
701 	pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
702 	pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
703 
704 	st->rule.ptr = r;
705 	st->nat_rule.ptr = NULL;
706 	st->anchor.ptr = NULL;
707 
708 	st->pfsync_time = time_uptime;
709 	st->sync_state = PFSYNC_S_NONE;
710 
711 	if (!(flags & PFSYNC_SI_IOCTL))
712 		st->state_flags |= PFSTATE_NOSYNC;
713 
714 	if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0)
715 		goto cleanup_state;
716 
717 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
718 	counter_u64_add(r->states_cur, 1);
719 	counter_u64_add(r->states_tot, 1);
720 
721 	if (!(flags & PFSYNC_SI_IOCTL)) {
722 		st->state_flags &= ~PFSTATE_NOSYNC;
723 		if (st->state_flags & PFSTATE_ACK) {
724 			struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
725 			PFSYNC_BUCKET_LOCK(b);
726 			pfsync_q_ins(st, PFSYNC_S_IACK, true);
727 			PFSYNC_BUCKET_UNLOCK(b);
728 
729 			pfsync_push_all(sc);
730 		}
731 	}
732 	st->state_flags &= ~PFSTATE_ACK;
733 	PF_STATE_UNLOCK(st);
734 
735 	return (0);
736 
737 cleanup:
738 	error = ENOMEM;
739 	if (skw == sks)
740 		sks = NULL;
741 	uma_zfree(V_pf_state_key_z, skw);
742 	uma_zfree(V_pf_state_key_z, sks);
743 
744 cleanup_state:	/* pf_state_insert() frees the state keys. */
745 	if (st) {
746 		st->timeout = PFTM_UNLINKED; /* appease an assert */
747 		pf_free_state(st);
748 	}
749 	return (error);
750 }
751 
752 #ifdef INET
753 static int
754 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
755 {
756 	struct pfsync_softc *sc = V_pfsyncif;
757 	struct mbuf *m = *mp;
758 	struct ip *ip = mtod(m, struct ip *);
759 	struct pfsync_header *ph;
760 	struct pfsync_subheader subh;
761 
762 	int offset, len, flags = 0;
763 	int rv;
764 	uint16_t count;
765 
766 	PF_RULES_RLOCK_TRACKER;
767 
768 	*mp = NULL;
769 	V_pfsyncstats.pfsyncs_ipackets++;
770 
771 	/* Verify that we have a sync interface configured. */
772 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
773 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
774 		goto done;
775 
776 	/* verify that the packet came in on the right interface */
777 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
778 		V_pfsyncstats.pfsyncs_badif++;
779 		goto done;
780 	}
781 
782 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
783 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
784 	/* verify that the IP TTL is 255. */
785 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
786 		V_pfsyncstats.pfsyncs_badttl++;
787 		goto done;
788 	}
789 
790 	offset = ip->ip_hl << 2;
791 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
792 		V_pfsyncstats.pfsyncs_hdrops++;
793 		goto done;
794 	}
795 
796 	if (offset + sizeof(*ph) > m->m_len) {
797 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
798 			V_pfsyncstats.pfsyncs_hdrops++;
799 			return (IPPROTO_DONE);
800 		}
801 		ip = mtod(m, struct ip *);
802 	}
803 	ph = (struct pfsync_header *)((char *)ip + offset);
804 
805 	/* verify the version */
806 	if (ph->version != PFSYNC_VERSION) {
807 		V_pfsyncstats.pfsyncs_badver++;
808 		goto done;
809 	}
810 
811 	len = ntohs(ph->len) + offset;
812 	if (m->m_pkthdr.len < len) {
813 		V_pfsyncstats.pfsyncs_badlen++;
814 		goto done;
815 	}
816 
817 	/*
818 	 * Trusting pf_chksum during packet processing, as well as seeking
819 	 * in interface name tree, require holding PF_RULES_RLOCK().
820 	 */
821 	PF_RULES_RLOCK();
822 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
823 		flags = PFSYNC_SI_CKSUM;
824 
825 	offset += sizeof(*ph);
826 	while (offset <= len - sizeof(subh)) {
827 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
828 		offset += sizeof(subh);
829 
830 		if (subh.action >= PFSYNC_ACT_MAX) {
831 			V_pfsyncstats.pfsyncs_badact++;
832 			PF_RULES_RUNLOCK();
833 			goto done;
834 		}
835 
836 		count = ntohs(subh.count);
837 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
838 		rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
839 		if (rv == -1) {
840 			PF_RULES_RUNLOCK();
841 			return (IPPROTO_DONE);
842 		}
843 
844 		offset += rv;
845 	}
846 	PF_RULES_RUNLOCK();
847 
848 done:
849 	m_freem(m);
850 	return (IPPROTO_DONE);
851 }
852 #endif
853 
854 #ifdef INET6
855 static int
856 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused)
857 {
858 	struct pfsync_softc *sc = V_pfsyncif;
859 	struct mbuf *m = *mp;
860 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
861 	struct pfsync_header *ph;
862 	struct pfsync_subheader subh;
863 
864 	int offset, len, flags = 0;
865 	int rv;
866 	uint16_t count;
867 
868 	PF_RULES_RLOCK_TRACKER;
869 
870 	*mp = NULL;
871 	V_pfsyncstats.pfsyncs_ipackets++;
872 
873 	/* Verify that we have a sync interface configured. */
874 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
875 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
876 		goto done;
877 
878 	/* verify that the packet came in on the right interface */
879 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
880 		V_pfsyncstats.pfsyncs_badif++;
881 		goto done;
882 	}
883 
884 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
885 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
886 	/* verify that the IP TTL is 255. */
887 	if (ip6->ip6_hlim != PFSYNC_DFLTTL) {
888 		V_pfsyncstats.pfsyncs_badttl++;
889 		goto done;
890 	}
891 
892 
893 	offset = sizeof(*ip6);
894 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
895 		V_pfsyncstats.pfsyncs_hdrops++;
896 		goto done;
897 	}
898 
899 	if (offset + sizeof(*ph) > m->m_len) {
900 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
901 			V_pfsyncstats.pfsyncs_hdrops++;
902 			return (IPPROTO_DONE);
903 		}
904 		ip6 = mtod(m, struct ip6_hdr *);
905 	}
906 	ph = (struct pfsync_header *)((char *)ip6 + offset);
907 
908 	/* verify the version */
909 	if (ph->version != PFSYNC_VERSION) {
910 		V_pfsyncstats.pfsyncs_badver++;
911 		goto done;
912 	}
913 
914 	len = ntohs(ph->len) + offset;
915 	if (m->m_pkthdr.len < len) {
916 		V_pfsyncstats.pfsyncs_badlen++;
917 		goto done;
918 	}
919 
920 	/*
921 	 * Trusting pf_chksum during packet processing, as well as seeking
922 	 * in interface name tree, require holding PF_RULES_RLOCK().
923 	 */
924 	PF_RULES_RLOCK();
925 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
926 		flags = PFSYNC_SI_CKSUM;
927 
928 	offset += sizeof(*ph);
929 	while (offset <= len - sizeof(subh)) {
930 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
931 		offset += sizeof(subh);
932 
933 		if (subh.action >= PFSYNC_ACT_MAX) {
934 			V_pfsyncstats.pfsyncs_badact++;
935 			PF_RULES_RUNLOCK();
936 			goto done;
937 		}
938 
939 		count = ntohs(subh.count);
940 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
941 		rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
942 		if (rv == -1) {
943 			PF_RULES_RUNLOCK();
944 			return (IPPROTO_DONE);
945 		}
946 
947 		offset += rv;
948 	}
949 	PF_RULES_RUNLOCK();
950 
951 done:
952 	m_freem(m);
953 	return (IPPROTO_DONE);
954 }
955 #endif
956 
957 static int
958 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action)
959 {
960 	struct pfsync_clr *clr;
961 	struct mbuf *mp;
962 	int len = sizeof(*clr) * count;
963 	int i, offp;
964 	u_int32_t creatorid;
965 
966 	mp = m_pulldown(m, offset, len, &offp);
967 	if (mp == NULL) {
968 		V_pfsyncstats.pfsyncs_badlen++;
969 		return (-1);
970 	}
971 	clr = (struct pfsync_clr *)(mp->m_data + offp);
972 
973 	for (i = 0; i < count; i++) {
974 		creatorid = clr[i].creatorid;
975 
976 		if (clr[i].ifname[0] != '\0' &&
977 		    pfi_kkif_find(clr[i].ifname) == NULL)
978 			continue;
979 
980 		for (int i = 0; i <= pf_hashmask; i++) {
981 			struct pf_idhash *ih = &V_pf_idhash[i];
982 			struct pf_kstate *s;
983 relock:
984 			PF_HASHROW_LOCK(ih);
985 			LIST_FOREACH(s, &ih->states, entry) {
986 				if (s->creatorid == creatorid) {
987 					s->state_flags |= PFSTATE_NOSYNC;
988 					pf_unlink_state(s);
989 					goto relock;
990 				}
991 			}
992 			PF_HASHROW_UNLOCK(ih);
993 		}
994 	}
995 
996 	return (len);
997 }
998 
999 static int
1000 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action)
1001 {
1002 	struct mbuf *mp;
1003 	union pfsync_state_union *sa, *sp;
1004 	int i, offp, len, msg_version;
1005 
1006 	switch (action) {
1007 		case PFSYNC_ACT_INS_1301:
1008 			len = sizeof(struct pfsync_state_1301) * count;
1009 			msg_version = PFSYNC_MSG_VERSION_1301;
1010 			break;
1011 		case PFSYNC_ACT_INS_1400:
1012 			len = sizeof(struct pfsync_state_1400) * count;
1013 			msg_version = PFSYNC_MSG_VERSION_1400;
1014 			break;
1015 		default:
1016 			V_pfsyncstats.pfsyncs_badact++;
1017 			return (-1);
1018 	}
1019 
1020 	mp = m_pulldown(m, offset, len, &offp);
1021 	if (mp == NULL) {
1022 		V_pfsyncstats.pfsyncs_badlen++;
1023 		return (-1);
1024 	}
1025 	sa = (union pfsync_state_union *)(mp->m_data + offp);
1026 
1027 	for (i = 0; i < count; i++) {
1028 		sp = &sa[i];
1029 
1030 		/* Check for invalid values. */
1031 		if (sp->pfs_1301.timeout >= PFTM_MAX ||
1032 		    sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1033 		    sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST ||
1034 		    sp->pfs_1301.direction > PF_OUT ||
1035 		    (sp->pfs_1301.af != AF_INET &&
1036 		    sp->pfs_1301.af != AF_INET6)) {
1037 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1038 				printf("%s: invalid value\n", __func__);
1039 			V_pfsyncstats.pfsyncs_badval++;
1040 			continue;
1041 		}
1042 
1043 		if (pfsync_state_import(sp, flags, msg_version) == ENOMEM)
1044 			/* Drop out, but process the rest of the actions. */
1045 			break;
1046 	}
1047 
1048 	return (len);
1049 }
1050 
1051 static int
1052 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action)
1053 {
1054 	struct pfsync_ins_ack *ia, *iaa;
1055 	struct pf_kstate *st;
1056 
1057 	struct mbuf *mp;
1058 	int len = count * sizeof(*ia);
1059 	int offp, i;
1060 
1061 	mp = m_pulldown(m, offset, len, &offp);
1062 	if (mp == NULL) {
1063 		V_pfsyncstats.pfsyncs_badlen++;
1064 		return (-1);
1065 	}
1066 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1067 
1068 	for (i = 0; i < count; i++) {
1069 		ia = &iaa[i];
1070 
1071 		st = pf_find_state_byid(ia->id, ia->creatorid);
1072 		if (st == NULL)
1073 			continue;
1074 
1075 		if (st->state_flags & PFSTATE_ACK) {
1076 			pfsync_undefer_state(st, 0);
1077 		}
1078 		PF_STATE_UNLOCK(st);
1079 	}
1080 	/*
1081 	 * XXX this is not yet implemented, but we know the size of the
1082 	 * message so we can skip it.
1083 	 */
1084 
1085 	return (count * sizeof(struct pfsync_ins_ack));
1086 }
1087 
1088 static int
1089 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src,
1090     struct pfsync_state_peer *dst)
1091 {
1092 	int sync = 0;
1093 
1094 	PF_STATE_LOCK_ASSERT(st);
1095 
1096 	/*
1097 	 * The state should never go backwards except
1098 	 * for syn-proxy states.  Neither should the
1099 	 * sequence window slide backwards.
1100 	 */
1101 	if ((st->src.state > src->state &&
1102 	    (st->src.state < PF_TCPS_PROXY_SRC ||
1103 	    src->state >= PF_TCPS_PROXY_SRC)) ||
1104 
1105 	    (st->src.state == src->state &&
1106 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
1107 		sync++;
1108 	else
1109 		pf_state_peer_ntoh(src, &st->src);
1110 
1111 	if ((st->dst.state > dst->state) ||
1112 
1113 	    (st->dst.state >= TCPS_SYN_SENT &&
1114 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
1115 		sync++;
1116 	else
1117 		pf_state_peer_ntoh(dst, &st->dst);
1118 
1119 	return (sync);
1120 }
1121 
1122 static int
1123 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action)
1124 {
1125 	struct pfsync_softc *sc = V_pfsyncif;
1126 	union pfsync_state_union *sa, *sp;
1127 	struct pf_kstate *st;
1128 	struct mbuf *mp;
1129 	int sync, offp, i, len, msg_version;
1130 
1131 	switch (action) {
1132 		case PFSYNC_ACT_UPD_1301:
1133 			len = sizeof(struct pfsync_state_1301) * count;
1134 			msg_version = PFSYNC_MSG_VERSION_1301;
1135 			break;
1136 		case PFSYNC_ACT_UPD_1400:
1137 			len = sizeof(struct pfsync_state_1400) * count;
1138 			msg_version = PFSYNC_MSG_VERSION_1400;
1139 			break;
1140 		default:
1141 			V_pfsyncstats.pfsyncs_badact++;
1142 			return (-1);
1143 	}
1144 
1145 	mp = m_pulldown(m, offset, len, &offp);
1146 	if (mp == NULL) {
1147 		V_pfsyncstats.pfsyncs_badlen++;
1148 		return (-1);
1149 	}
1150 	sa = (union pfsync_state_union *)(mp->m_data + offp);
1151 
1152 	for (i = 0; i < count; i++) {
1153 		sp = &sa[i];
1154 
1155 		/* check for invalid values */
1156 		if (sp->pfs_1301.timeout >= PFTM_MAX ||
1157 		    sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1158 		    sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) {
1159 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1160 				printf("pfsync_input: PFSYNC_ACT_UPD: "
1161 				    "invalid value\n");
1162 			}
1163 			V_pfsyncstats.pfsyncs_badval++;
1164 			continue;
1165 		}
1166 
1167 		st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid);
1168 		if (st == NULL) {
1169 			/* insert the update */
1170 			if (pfsync_state_import(sp, flags, msg_version))
1171 				V_pfsyncstats.pfsyncs_badstate++;
1172 			continue;
1173 		}
1174 
1175 		if (st->state_flags & PFSTATE_ACK) {
1176 			pfsync_undefer_state(st, 1);
1177 		}
1178 
1179 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1180 			sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst);
1181 		else {
1182 			sync = 0;
1183 
1184 			/*
1185 			 * Non-TCP protocol state machine always go
1186 			 * forwards
1187 			 */
1188 			if (st->src.state > sp->pfs_1301.src.state)
1189 				sync++;
1190 			else
1191 				pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
1192 			if (st->dst.state > sp->pfs_1301.dst.state)
1193 				sync++;
1194 			else
1195 				pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1196 		}
1197 		if (sync < 2) {
1198 			pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst);
1199 			pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1200 			st->expire = time_uptime;
1201 			st->timeout = sp->pfs_1301.timeout;
1202 		}
1203 		st->pfsync_time = time_uptime;
1204 
1205 		if (sync) {
1206 			V_pfsyncstats.pfsyncs_stale++;
1207 
1208 			pfsync_update_state(st);
1209 			PF_STATE_UNLOCK(st);
1210 			pfsync_push_all(sc);
1211 			continue;
1212 		}
1213 		PF_STATE_UNLOCK(st);
1214 	}
1215 
1216 	return (len);
1217 }
1218 
1219 static int
1220 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action)
1221 {
1222 	struct pfsync_softc *sc = V_pfsyncif;
1223 	struct pfsync_upd_c *ua, *up;
1224 	struct pf_kstate *st;
1225 	int len = count * sizeof(*up);
1226 	int sync;
1227 	struct mbuf *mp;
1228 	int offp, i;
1229 
1230 	mp = m_pulldown(m, offset, len, &offp);
1231 	if (mp == NULL) {
1232 		V_pfsyncstats.pfsyncs_badlen++;
1233 		return (-1);
1234 	}
1235 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1236 
1237 	for (i = 0; i < count; i++) {
1238 		up = &ua[i];
1239 
1240 		/* check for invalid values */
1241 		if (up->timeout >= PFTM_MAX ||
1242 		    up->src.state > PF_TCPS_PROXY_DST ||
1243 		    up->dst.state > PF_TCPS_PROXY_DST) {
1244 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1245 				printf("pfsync_input: "
1246 				    "PFSYNC_ACT_UPD_C: "
1247 				    "invalid value\n");
1248 			}
1249 			V_pfsyncstats.pfsyncs_badval++;
1250 			continue;
1251 		}
1252 
1253 		st = pf_find_state_byid(up->id, up->creatorid);
1254 		if (st == NULL) {
1255 			/* We don't have this state. Ask for it. */
1256 			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1257 			pfsync_request_update(up->creatorid, up->id);
1258 			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1259 			continue;
1260 		}
1261 
1262 		if (st->state_flags & PFSTATE_ACK) {
1263 			pfsync_undefer_state(st, 1);
1264 		}
1265 
1266 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1267 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1268 		else {
1269 			sync = 0;
1270 
1271 			/*
1272 			 * Non-TCP protocol state machine always go
1273 			 * forwards
1274 			 */
1275 			if (st->src.state > up->src.state)
1276 				sync++;
1277 			else
1278 				pf_state_peer_ntoh(&up->src, &st->src);
1279 			if (st->dst.state > up->dst.state)
1280 				sync++;
1281 			else
1282 				pf_state_peer_ntoh(&up->dst, &st->dst);
1283 		}
1284 		if (sync < 2) {
1285 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1286 			pf_state_peer_ntoh(&up->dst, &st->dst);
1287 			st->expire = time_uptime;
1288 			st->timeout = up->timeout;
1289 		}
1290 		st->pfsync_time = time_uptime;
1291 
1292 		if (sync) {
1293 			V_pfsyncstats.pfsyncs_stale++;
1294 
1295 			pfsync_update_state(st);
1296 			PF_STATE_UNLOCK(st);
1297 			pfsync_push_all(sc);
1298 			continue;
1299 		}
1300 		PF_STATE_UNLOCK(st);
1301 	}
1302 
1303 	return (len);
1304 }
1305 
1306 static int
1307 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action)
1308 {
1309 	struct pfsync_upd_req *ur, *ura;
1310 	struct mbuf *mp;
1311 	int len = count * sizeof(*ur);
1312 	int i, offp;
1313 
1314 	struct pf_kstate *st;
1315 
1316 	mp = m_pulldown(m, offset, len, &offp);
1317 	if (mp == NULL) {
1318 		V_pfsyncstats.pfsyncs_badlen++;
1319 		return (-1);
1320 	}
1321 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1322 
1323 	for (i = 0; i < count; i++) {
1324 		ur = &ura[i];
1325 
1326 		if (ur->id == 0 && ur->creatorid == 0)
1327 			pfsync_bulk_start();
1328 		else {
1329 			st = pf_find_state_byid(ur->id, ur->creatorid);
1330 			if (st == NULL) {
1331 				V_pfsyncstats.pfsyncs_badstate++;
1332 				continue;
1333 			}
1334 			if (st->state_flags & PFSTATE_NOSYNC) {
1335 				PF_STATE_UNLOCK(st);
1336 				continue;
1337 			}
1338 
1339 			pfsync_update_state_req(st);
1340 			PF_STATE_UNLOCK(st);
1341 		}
1342 	}
1343 
1344 	return (len);
1345 }
1346 
1347 static int
1348 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action)
1349 {
1350 	struct mbuf *mp;
1351 	struct pfsync_del_c *sa, *sp;
1352 	struct pf_kstate *st;
1353 	int len = count * sizeof(*sp);
1354 	int offp, i;
1355 
1356 	mp = m_pulldown(m, offset, len, &offp);
1357 	if (mp == NULL) {
1358 		V_pfsyncstats.pfsyncs_badlen++;
1359 		return (-1);
1360 	}
1361 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1362 
1363 	for (i = 0; i < count; i++) {
1364 		sp = &sa[i];
1365 
1366 		st = pf_find_state_byid(sp->id, sp->creatorid);
1367 		if (st == NULL) {
1368 			V_pfsyncstats.pfsyncs_badstate++;
1369 			continue;
1370 		}
1371 
1372 		st->state_flags |= PFSTATE_NOSYNC;
1373 		pf_unlink_state(st);
1374 	}
1375 
1376 	return (len);
1377 }
1378 
1379 static int
1380 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action)
1381 {
1382 	struct pfsync_softc *sc = V_pfsyncif;
1383 	struct pfsync_bus *bus;
1384 	struct mbuf *mp;
1385 	int len = count * sizeof(*bus);
1386 	int offp;
1387 
1388 	PFSYNC_BLOCK(sc);
1389 
1390 	/* If we're not waiting for a bulk update, who cares. */
1391 	if (sc->sc_ureq_sent == 0) {
1392 		PFSYNC_BUNLOCK(sc);
1393 		return (len);
1394 	}
1395 
1396 	mp = m_pulldown(m, offset, len, &offp);
1397 	if (mp == NULL) {
1398 		PFSYNC_BUNLOCK(sc);
1399 		V_pfsyncstats.pfsyncs_badlen++;
1400 		return (-1);
1401 	}
1402 	bus = (struct pfsync_bus *)(mp->m_data + offp);
1403 
1404 	switch (bus->status) {
1405 	case PFSYNC_BUS_START:
1406 		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1407 		    V_pf_limits[PF_LIMIT_STATES].limit /
1408 		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1409 		    sizeof(union pfsync_state_union)),
1410 		    pfsync_bulk_fail, sc);
1411 		if (V_pf_status.debug >= PF_DEBUG_MISC)
1412 			printf("pfsync: received bulk update start\n");
1413 		break;
1414 
1415 	case PFSYNC_BUS_END:
1416 		if (time_uptime - ntohl(bus->endtime) >=
1417 		    sc->sc_ureq_sent) {
1418 			/* that's it, we're happy */
1419 			sc->sc_ureq_sent = 0;
1420 			sc->sc_bulk_tries = 0;
1421 			callout_stop(&sc->sc_bulkfail_tmo);
1422 			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1423 				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1424 				    "pfsync bulk done");
1425 			sc->sc_flags |= PFSYNCF_OK;
1426 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1427 				printf("pfsync: received valid "
1428 				    "bulk update end\n");
1429 		} else {
1430 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1431 				printf("pfsync: received invalid "
1432 				    "bulk update end: bad timestamp\n");
1433 		}
1434 		break;
1435 	}
1436 	PFSYNC_BUNLOCK(sc);
1437 
1438 	return (len);
1439 }
1440 
1441 static int
1442 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action)
1443 {
1444 	int len = count * sizeof(struct pfsync_tdb);
1445 
1446 #if defined(IPSEC)
1447 	struct pfsync_tdb *tp;
1448 	struct mbuf *mp;
1449 	int offp;
1450 	int i;
1451 	int s;
1452 
1453 	mp = m_pulldown(m, offset, len, &offp);
1454 	if (mp == NULL) {
1455 		V_pfsyncstats.pfsyncs_badlen++;
1456 		return (-1);
1457 	}
1458 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1459 
1460 	for (i = 0; i < count; i++)
1461 		pfsync_update_net_tdb(&tp[i]);
1462 #endif
1463 
1464 	return (len);
1465 }
1466 
1467 #if defined(IPSEC)
1468 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1469 static void
1470 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1471 {
1472 	struct tdb		*tdb;
1473 	int			 s;
1474 
1475 	/* check for invalid values */
1476 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1477 	    (pt->dst.sa.sa_family != AF_INET &&
1478 	    pt->dst.sa.sa_family != AF_INET6))
1479 		goto bad;
1480 
1481 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1482 	if (tdb) {
1483 		pt->rpl = ntohl(pt->rpl);
1484 		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1485 
1486 		/* Neither replay nor byte counter should ever decrease. */
1487 		if (pt->rpl < tdb->tdb_rpl ||
1488 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1489 			goto bad;
1490 		}
1491 
1492 		tdb->tdb_rpl = pt->rpl;
1493 		tdb->tdb_cur_bytes = pt->cur_bytes;
1494 	}
1495 	return;
1496 
1497 bad:
1498 	if (V_pf_status.debug >= PF_DEBUG_MISC)
1499 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1500 		    "invalid value\n");
1501 	V_pfsyncstats.pfsyncs_badstate++;
1502 	return;
1503 }
1504 #endif
1505 
1506 static int
1507 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action)
1508 {
1509 	/* check if we are at the right place in the packet */
1510 	if (offset != m->m_pkthdr.len)
1511 		V_pfsyncstats.pfsyncs_badlen++;
1512 
1513 	/* we're done. free and let the caller return */
1514 	m_freem(m);
1515 	return (-1);
1516 }
1517 
1518 static int
1519 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action)
1520 {
1521 	V_pfsyncstats.pfsyncs_badact++;
1522 
1523 	m_freem(m);
1524 	return (-1);
1525 }
1526 
1527 static int
1528 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1529 	struct route *rt)
1530 {
1531 	m_freem(m);
1532 	return (0);
1533 }
1534 
1535 /* ARGSUSED */
1536 static int
1537 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1538 {
1539 	struct pfsync_softc *sc = ifp->if_softc;
1540 	struct ifreq *ifr = (struct ifreq *)data;
1541 	struct pfsyncreq pfsyncr;
1542 	size_t nvbuflen;
1543 	int error;
1544 	int c;
1545 
1546 	switch (cmd) {
1547 	case SIOCSIFFLAGS:
1548 		PFSYNC_LOCK(sc);
1549 		if (ifp->if_flags & IFF_UP) {
1550 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1551 			PFSYNC_UNLOCK(sc);
1552 			pfsync_pointers_init();
1553 		} else {
1554 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1555 			PFSYNC_UNLOCK(sc);
1556 			pfsync_pointers_uninit();
1557 		}
1558 		break;
1559 	case SIOCSIFMTU:
1560 		if (!sc->sc_sync_if ||
1561 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1562 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1563 			return (EINVAL);
1564 		if (ifr->ifr_mtu < ifp->if_mtu) {
1565 			for (c = 0; c < pfsync_buckets; c++) {
1566 				PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1567 				if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
1568 					pfsync_sendout(1, c);
1569 				PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1570 			}
1571 		}
1572 		ifp->if_mtu = ifr->ifr_mtu;
1573 		break;
1574 	case SIOCGETPFSYNC:
1575 		bzero(&pfsyncr, sizeof(pfsyncr));
1576 		PFSYNC_LOCK(sc);
1577 		if (sc->sc_sync_if) {
1578 			strlcpy(pfsyncr.pfsyncr_syncdev,
1579 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1580 		}
1581 		pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
1582 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1583 		pfsyncr.pfsyncr_defer = sc->sc_flags;
1584 		PFSYNC_UNLOCK(sc);
1585 		return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
1586 		    sizeof(pfsyncr)));
1587 
1588 	case SIOCGETPFSYNCNV:
1589 	    {
1590 		nvlist_t *nvl_syncpeer;
1591 		nvlist_t *nvl = nvlist_create(0);
1592 
1593 		if (nvl == NULL)
1594 			return (ENOMEM);
1595 
1596 		if (sc->sc_sync_if)
1597 			nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname);
1598 		nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates);
1599 		nvlist_add_number(nvl, "flags", sc->sc_flags);
1600 		nvlist_add_number(nvl, "version", sc->sc_version);
1601 		if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL)
1602 			nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer);
1603 
1604 		void *packed = NULL;
1605 		packed = nvlist_pack(nvl, &nvbuflen);
1606 		if (packed == NULL) {
1607 			free(packed, M_NVLIST);
1608 			nvlist_destroy(nvl);
1609 			return (ENOMEM);
1610 		}
1611 
1612 		if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
1613 			ifr->ifr_cap_nv.length = nvbuflen;
1614 			ifr->ifr_cap_nv.buffer = NULL;
1615 			free(packed, M_NVLIST);
1616 			nvlist_destroy(nvl);
1617 			return (EFBIG);
1618 		}
1619 
1620 		ifr->ifr_cap_nv.length = nvbuflen;
1621 		error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen);
1622 
1623 		nvlist_destroy(nvl);
1624 		nvlist_destroy(nvl_syncpeer);
1625 		free(packed, M_NVLIST);
1626 		break;
1627 	    }
1628 
1629 	case SIOCSETPFSYNC:
1630 	    {
1631 		struct pfsync_kstatus status;
1632 
1633 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1634 			return (error);
1635 		if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
1636 		    sizeof(pfsyncr))))
1637 			return (error);
1638 
1639 		memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1640 		pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status);
1641 
1642 		error = pfsync_kstatus_to_softc(&status, sc);
1643 		return (error);
1644 	    }
1645 	case SIOCSETPFSYNCNV:
1646 	    {
1647 		struct pfsync_kstatus status;
1648 		void *data;
1649 		nvlist_t *nvl;
1650 
1651 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1652 			return (error);
1653 		if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
1654 			return (EINVAL);
1655 
1656 		data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
1657 
1658 		if ((error = copyin(ifr->ifr_cap_nv.buffer, data,
1659 		    ifr->ifr_cap_nv.length)) != 0) {
1660 			free(data, M_TEMP);
1661 			return (error);
1662 		}
1663 
1664 		if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) {
1665 			free(data, M_TEMP);
1666 			return (EINVAL);
1667 		}
1668 
1669 		memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1670 		pfsync_nvstatus_to_kstatus(nvl, &status);
1671 
1672 		nvlist_destroy(nvl);
1673 		free(data, M_TEMP);
1674 
1675 		error = pfsync_kstatus_to_softc(&status, sc);
1676 		return (error);
1677 	    }
1678 	default:
1679 		return (ENOTTY);
1680 	}
1681 
1682 	return (0);
1683 }
1684 
1685 static void
1686 pfsync_out_state_1301(struct pf_kstate *st, void *buf)
1687 {
1688 	union pfsync_state_union *sp = buf;
1689 
1690 	pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301);
1691 }
1692 
1693 static void
1694 pfsync_out_state_1400(struct pf_kstate *st, void *buf)
1695 {
1696 	union pfsync_state_union *sp = buf;
1697 
1698 	pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400);
1699 }
1700 
1701 static void
1702 pfsync_out_iack(struct pf_kstate *st, void *buf)
1703 {
1704 	struct pfsync_ins_ack *iack = buf;
1705 
1706 	iack->id = st->id;
1707 	iack->creatorid = st->creatorid;
1708 }
1709 
1710 static void
1711 pfsync_out_upd_c(struct pf_kstate *st, void *buf)
1712 {
1713 	struct pfsync_upd_c *up = buf;
1714 
1715 	bzero(up, sizeof(*up));
1716 	up->id = st->id;
1717 	pf_state_peer_hton(&st->src, &up->src);
1718 	pf_state_peer_hton(&st->dst, &up->dst);
1719 	up->creatorid = st->creatorid;
1720 	up->timeout = st->timeout;
1721 }
1722 
1723 static void
1724 pfsync_out_del_c(struct pf_kstate *st, void *buf)
1725 {
1726 	struct pfsync_del_c *dp = buf;
1727 
1728 	dp->id = st->id;
1729 	dp->creatorid = st->creatorid;
1730 	st->state_flags |= PFSTATE_NOSYNC;
1731 }
1732 
1733 static void
1734 pfsync_drop(struct pfsync_softc *sc)
1735 {
1736 	struct pf_kstate *st, *next;
1737 	struct pfsync_upd_req_item *ur;
1738 	struct pfsync_bucket *b;
1739 	int c;
1740 	enum pfsync_q_id q;
1741 
1742 	for (c = 0; c < pfsync_buckets; c++) {
1743 		b = &sc->sc_buckets[c];
1744 		for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1745 			if (TAILQ_EMPTY(&b->b_qs[q]))
1746 				continue;
1747 
1748 			TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
1749 				KASSERT(st->sync_state == pfsync_qid_sstate[q],
1750 					("%s: st->sync_state == q",
1751 						__func__));
1752 				st->sync_state = PFSYNC_S_NONE;
1753 				pf_release_state(st);
1754 			}
1755 			TAILQ_INIT(&b->b_qs[q]);
1756 		}
1757 
1758 		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1759 			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1760 			free(ur, M_PFSYNC);
1761 		}
1762 
1763 		b->b_len = PFSYNC_MINPKT;
1764 		b->b_plus = NULL;
1765 	}
1766 }
1767 
1768 static void
1769 pfsync_sendout(int schedswi, int c)
1770 {
1771 	struct pfsync_softc *sc = V_pfsyncif;
1772 	struct ifnet *ifp = sc->sc_ifp;
1773 	struct mbuf *m;
1774 	struct pfsync_header *ph;
1775 	struct pfsync_subheader *subh;
1776 	struct pf_kstate *st, *st_next;
1777 	struct pfsync_upd_req_item *ur;
1778 	struct pfsync_bucket *b = &sc->sc_buckets[c];
1779 	int aflen, offset, count = 0;
1780 	enum pfsync_q_id q;
1781 
1782 	KASSERT(sc != NULL, ("%s: null sc", __func__));
1783 	KASSERT(b->b_len > PFSYNC_MINPKT,
1784 	    ("%s: sc_len %zu", __func__, b->b_len));
1785 	PFSYNC_BUCKET_LOCK_ASSERT(b);
1786 
1787 	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
1788 		pfsync_drop(sc);
1789 		return;
1790 	}
1791 
1792 	m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
1793 	if (m == NULL) {
1794 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1795 		V_pfsyncstats.pfsyncs_onomem++;
1796 		return;
1797 	}
1798 	m->m_data += max_linkhdr;
1799 	m->m_len = m->m_pkthdr.len = b->b_len;
1800 
1801 	/* build the ip header */
1802 	switch (sc->sc_sync_peer.ss_family) {
1803 #ifdef INET
1804 	case AF_INET:
1805 	    {
1806 		struct ip *ip;
1807 
1808 		ip = mtod(m, struct ip *);
1809 		bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip));
1810 		aflen = offset = sizeof(*ip);
1811 
1812 		ip->ip_len = htons(m->m_pkthdr.len);
1813 		ip_fillid(ip);
1814 		break;
1815 	    }
1816 #endif
1817 #ifdef INET6
1818 	case AF_INET6:
1819 		{
1820 		struct ip6_hdr *ip6;
1821 
1822 		ip6 = mtod(m, struct ip6_hdr *);
1823 		bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6));
1824 		aflen = offset = sizeof(*ip6);
1825 
1826 		ip6->ip6_plen = htons(m->m_pkthdr.len);
1827 		break;
1828 		}
1829 #endif
1830 	default:
1831 		m_freem(m);
1832 		return;
1833 	}
1834 
1835 	/* build the pfsync header */
1836 	ph = (struct pfsync_header *)(m->m_data + offset);
1837 	bzero(ph, sizeof(*ph));
1838 	offset += sizeof(*ph);
1839 
1840 	ph->version = PFSYNC_VERSION;
1841 	ph->len = htons(b->b_len - aflen);
1842 	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1843 
1844 	/* walk the queues */
1845 	for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1846 		if (TAILQ_EMPTY(&b->b_qs[q]))
1847 			continue;
1848 
1849 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1850 		offset += sizeof(*subh);
1851 
1852 		count = 0;
1853 		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
1854 			KASSERT(st->sync_state == pfsync_qid_sstate[q],
1855 				("%s: st->sync_state == q",
1856 					__func__));
1857 			/*
1858 			 * XXXGL: some of write methods do unlocked reads
1859 			 * of state data :(
1860 			 */
1861 			pfsync_qs[q].write(st, m->m_data + offset);
1862 			offset += pfsync_qs[q].len;
1863 			st->sync_state = PFSYNC_S_NONE;
1864 			pf_release_state(st);
1865 			count++;
1866 		}
1867 		TAILQ_INIT(&b->b_qs[q]);
1868 
1869 		bzero(subh, sizeof(*subh));
1870 		subh->action = pfsync_qs[q].action;
1871 		subh->count = htons(count);
1872 		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
1873 	}
1874 
1875 	if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
1876 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1877 		offset += sizeof(*subh);
1878 
1879 		count = 0;
1880 		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1881 			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1882 
1883 			bcopy(&ur->ur_msg, m->m_data + offset,
1884 			    sizeof(ur->ur_msg));
1885 			offset += sizeof(ur->ur_msg);
1886 			free(ur, M_PFSYNC);
1887 			count++;
1888 		}
1889 
1890 		bzero(subh, sizeof(*subh));
1891 		subh->action = PFSYNC_ACT_UPD_REQ;
1892 		subh->count = htons(count);
1893 		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
1894 	}
1895 
1896 	/* has someone built a custom region for us to add? */
1897 	if (b->b_plus != NULL) {
1898 		bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
1899 		offset += b->b_pluslen;
1900 
1901 		b->b_plus = NULL;
1902 	}
1903 
1904 	subh = (struct pfsync_subheader *)(m->m_data + offset);
1905 	offset += sizeof(*subh);
1906 
1907 	bzero(subh, sizeof(*subh));
1908 	subh->action = PFSYNC_ACT_EOF;
1909 	subh->count = htons(1);
1910 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
1911 
1912 	/* we're done, let's put it on the wire */
1913 	if (ifp->if_bpf) {
1914 		m->m_data += aflen;
1915 		m->m_len = m->m_pkthdr.len = b->b_len - aflen;
1916 		BPF_MTAP(ifp, m);
1917 		m->m_data -= aflen;
1918 		m->m_len = m->m_pkthdr.len = b->b_len;
1919 	}
1920 
1921 	if (sc->sc_sync_if == NULL) {
1922 		b->b_len = PFSYNC_MINPKT;
1923 		m_freem(m);
1924 		return;
1925 	}
1926 
1927 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
1928 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1929 	b->b_len = PFSYNC_MINPKT;
1930 
1931 	if (!_IF_QFULL(&b->b_snd))
1932 		_IF_ENQUEUE(&b->b_snd, m);
1933 	else {
1934 		m_freem(m);
1935 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
1936 	}
1937 	if (schedswi)
1938 		swi_sched(V_pfsync_swi_cookie, 0);
1939 }
1940 
1941 static void
1942 pfsync_insert_state(struct pf_kstate *st)
1943 {
1944 	struct pfsync_softc *sc = V_pfsyncif;
1945 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1946 
1947 	if (st->state_flags & PFSTATE_NOSYNC)
1948 		return;
1949 
1950 	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
1951 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1952 		st->state_flags |= PFSTATE_NOSYNC;
1953 		return;
1954 	}
1955 
1956 	KASSERT(st->sync_state == PFSYNC_S_NONE,
1957 		("%s: st->sync_state %u", __func__, st->sync_state));
1958 
1959 	PFSYNC_BUCKET_LOCK(b);
1960 	if (b->b_len == PFSYNC_MINPKT)
1961 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1962 
1963 	pfsync_q_ins(st, PFSYNC_S_INS, true);
1964 	PFSYNC_BUCKET_UNLOCK(b);
1965 
1966 	st->sync_updates = 0;
1967 }
1968 
1969 static int
1970 pfsync_defer(struct pf_kstate *st, struct mbuf *m)
1971 {
1972 	struct pfsync_softc *sc = V_pfsyncif;
1973 	struct pfsync_deferral *pd;
1974 	struct pfsync_bucket *b;
1975 
1976 	if (m->m_flags & (M_BCAST|M_MCAST))
1977 		return (0);
1978 
1979 	if (sc == NULL)
1980 		return (0);
1981 
1982 	b = pfsync_get_bucket(sc, st);
1983 
1984 	PFSYNC_LOCK(sc);
1985 
1986 	if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
1987 	    !(sc->sc_flags & PFSYNCF_DEFER)) {
1988 		PFSYNC_UNLOCK(sc);
1989 		return (0);
1990 	}
1991 
1992 	PFSYNC_BUCKET_LOCK(b);
1993 	PFSYNC_UNLOCK(sc);
1994 
1995 	if (b->b_deferred >= 128)
1996 		pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
1997 
1998 	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
1999 	if (pd == NULL) {
2000 		PFSYNC_BUCKET_UNLOCK(b);
2001 		return (0);
2002 	}
2003 	b->b_deferred++;
2004 
2005 	m->m_flags |= M_SKIP_FIREWALL;
2006 	st->state_flags |= PFSTATE_ACK;
2007 
2008 	pd->pd_sc = sc;
2009 	pd->pd_st = st;
2010 	pf_ref_state(st);
2011 	pd->pd_m = m;
2012 
2013 	TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
2014 	callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
2015 	callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000,
2016 	    pfsync_defer_tmo, pd);
2017 
2018 	pfsync_push(b);
2019 	PFSYNC_BUCKET_UNLOCK(b);
2020 
2021 	return (1);
2022 }
2023 
2024 static void
2025 pfsync_undefer(struct pfsync_deferral *pd, int drop)
2026 {
2027 	struct pfsync_softc *sc = pd->pd_sc;
2028 	struct mbuf *m = pd->pd_m;
2029 	struct pf_kstate *st = pd->pd_st;
2030 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2031 
2032 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2033 
2034 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2035 	b->b_deferred--;
2036 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
2037 	free(pd, M_PFSYNC);
2038 	pf_release_state(st);
2039 
2040 	if (drop)
2041 		m_freem(m);
2042 	else {
2043 		_IF_ENQUEUE(&b->b_snd, m);
2044 		pfsync_push(b);
2045 	}
2046 }
2047 
2048 static void
2049 pfsync_defer_tmo(void *arg)
2050 {
2051 	struct epoch_tracker et;
2052 	struct pfsync_deferral *pd = arg;
2053 	struct pfsync_softc *sc = pd->pd_sc;
2054 	struct mbuf *m = pd->pd_m;
2055 	struct pf_kstate *st = pd->pd_st;
2056 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2057 
2058 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2059 
2060 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2061 	b->b_deferred--;
2062 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
2063 	PFSYNC_BUCKET_UNLOCK(b);
2064 	free(pd, M_PFSYNC);
2065 
2066 	if (sc->sc_sync_if == NULL) {
2067 		pf_release_state(st);
2068 		m_freem(m);
2069 		return;
2070 	}
2071 
2072 	NET_EPOCH_ENTER(et);
2073 	CURVNET_SET(sc->sc_sync_if->if_vnet);
2074 
2075 	pfsync_tx(sc, m);
2076 
2077 	pf_release_state(st);
2078 
2079 	CURVNET_RESTORE();
2080 	NET_EPOCH_EXIT(et);
2081 }
2082 
2083 static void
2084 pfsync_undefer_state_locked(struct pf_kstate *st, int drop)
2085 {
2086 	struct pfsync_softc *sc = V_pfsyncif;
2087 	struct pfsync_deferral *pd;
2088 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2089 
2090 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2091 
2092 	TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
2093 		 if (pd->pd_st == st) {
2094 			if (callout_stop(&pd->pd_tmo) > 0)
2095 				pfsync_undefer(pd, drop);
2096 
2097 			return;
2098 		}
2099 	}
2100 
2101 	panic("%s: unable to find deferred state", __func__);
2102 }
2103 
2104 static void
2105 pfsync_undefer_state(struct pf_kstate *st, int drop)
2106 {
2107 	struct pfsync_softc *sc = V_pfsyncif;
2108 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2109 
2110 	PFSYNC_BUCKET_LOCK(b);
2111 	pfsync_undefer_state_locked(st, drop);
2112 	PFSYNC_BUCKET_UNLOCK(b);
2113 }
2114 
2115 static struct pfsync_bucket*
2116 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st)
2117 {
2118 	int c = PF_IDHASH(st) % pfsync_buckets;
2119 	return &sc->sc_buckets[c];
2120 }
2121 
2122 static void
2123 pfsync_update_state(struct pf_kstate *st)
2124 {
2125 	struct pfsync_softc *sc = V_pfsyncif;
2126 	bool sync = false, ref = true;
2127 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2128 
2129 	PF_STATE_LOCK_ASSERT(st);
2130 	PFSYNC_BUCKET_LOCK(b);
2131 
2132 	if (st->state_flags & PFSTATE_ACK)
2133 		pfsync_undefer_state_locked(st, 0);
2134 	if (st->state_flags & PFSTATE_NOSYNC) {
2135 		if (st->sync_state != PFSYNC_S_NONE)
2136 			pfsync_q_del(st, true, b);
2137 		PFSYNC_BUCKET_UNLOCK(b);
2138 		return;
2139 	}
2140 
2141 	if (b->b_len == PFSYNC_MINPKT)
2142 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2143 
2144 	switch (st->sync_state) {
2145 	case PFSYNC_S_UPD_C:
2146 	case PFSYNC_S_UPD:
2147 	case PFSYNC_S_INS:
2148 		/* we're already handling it */
2149 
2150 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2151 			st->sync_updates++;
2152 			if (st->sync_updates >= sc->sc_maxupdates)
2153 				sync = true;
2154 		}
2155 		break;
2156 
2157 	case PFSYNC_S_IACK:
2158 		pfsync_q_del(st, false, b);
2159 		ref = false;
2160 		/* FALLTHROUGH */
2161 
2162 	case PFSYNC_S_NONE:
2163 		pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
2164 		st->sync_updates = 0;
2165 		break;
2166 
2167 	default:
2168 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2169 	}
2170 
2171 	if (sync || (time_uptime - st->pfsync_time) < 2)
2172 		pfsync_push(b);
2173 
2174 	PFSYNC_BUCKET_UNLOCK(b);
2175 }
2176 
2177 static void
2178 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2179 {
2180 	struct pfsync_softc *sc = V_pfsyncif;
2181 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2182 	struct pfsync_upd_req_item *item;
2183 	size_t nlen = sizeof(struct pfsync_upd_req);
2184 
2185 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2186 
2187 	/*
2188 	 * This code does a bit to prevent multiple update requests for the
2189 	 * same state being generated. It searches current subheader queue,
2190 	 * but it doesn't lookup into queue of already packed datagrams.
2191 	 */
2192 	TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
2193 		if (item->ur_msg.id == id &&
2194 		    item->ur_msg.creatorid == creatorid)
2195 			return;
2196 
2197 	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
2198 	if (item == NULL)
2199 		return; /* XXX stats */
2200 
2201 	item->ur_msg.id = id;
2202 	item->ur_msg.creatorid = creatorid;
2203 
2204 	if (TAILQ_EMPTY(&b->b_upd_req_list))
2205 		nlen += sizeof(struct pfsync_subheader);
2206 
2207 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2208 		pfsync_sendout(0, 0);
2209 
2210 		nlen = sizeof(struct pfsync_subheader) +
2211 		    sizeof(struct pfsync_upd_req);
2212 	}
2213 
2214 	TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
2215 	b->b_len += nlen;
2216 
2217 	pfsync_push(b);
2218 }
2219 
2220 static bool
2221 pfsync_update_state_req(struct pf_kstate *st)
2222 {
2223 	struct pfsync_softc *sc = V_pfsyncif;
2224 	bool ref = true, full = false;
2225 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2226 
2227 	PF_STATE_LOCK_ASSERT(st);
2228 	PFSYNC_BUCKET_LOCK(b);
2229 
2230 	if (st->state_flags & PFSTATE_NOSYNC) {
2231 		if (st->sync_state != PFSYNC_S_NONE)
2232 			pfsync_q_del(st, true, b);
2233 		PFSYNC_BUCKET_UNLOCK(b);
2234 		return (full);
2235 	}
2236 
2237 	switch (st->sync_state) {
2238 	case PFSYNC_S_UPD_C:
2239 	case PFSYNC_S_IACK:
2240 		pfsync_q_del(st, false, b);
2241 		ref = false;
2242 		/* FALLTHROUGH */
2243 
2244 	case PFSYNC_S_NONE:
2245 		pfsync_q_ins(st, PFSYNC_S_UPD, ref);
2246 		pfsync_push(b);
2247 		break;
2248 
2249 	case PFSYNC_S_INS:
2250 	case PFSYNC_S_UPD:
2251 	case PFSYNC_S_DEL_C:
2252 		/* we're already handling it */
2253 		break;
2254 
2255 	default:
2256 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2257 	}
2258 
2259 	if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union))
2260 		full = true;
2261 
2262 	PFSYNC_BUCKET_UNLOCK(b);
2263 
2264 	return (full);
2265 }
2266 
2267 static void
2268 pfsync_delete_state(struct pf_kstate *st)
2269 {
2270 	struct pfsync_softc *sc = V_pfsyncif;
2271 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2272 	bool ref = true;
2273 
2274 	PFSYNC_BUCKET_LOCK(b);
2275 	if (st->state_flags & PFSTATE_ACK)
2276 		pfsync_undefer_state_locked(st, 1);
2277 	if (st->state_flags & PFSTATE_NOSYNC) {
2278 		if (st->sync_state != PFSYNC_S_NONE)
2279 			pfsync_q_del(st, true, b);
2280 		PFSYNC_BUCKET_UNLOCK(b);
2281 		return;
2282 	}
2283 
2284 	if (b->b_len == PFSYNC_MINPKT)
2285 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2286 
2287 	switch (st->sync_state) {
2288 	case PFSYNC_S_INS:
2289 		/* We never got to tell the world so just forget about it. */
2290 		pfsync_q_del(st, true, b);
2291 		break;
2292 
2293 	case PFSYNC_S_UPD_C:
2294 	case PFSYNC_S_UPD:
2295 	case PFSYNC_S_IACK:
2296 		pfsync_q_del(st, false, b);
2297 		ref = false;
2298 		/* FALLTHROUGH */
2299 
2300 	case PFSYNC_S_NONE:
2301 		pfsync_q_ins(st, PFSYNC_S_DEL_C, ref);
2302 		break;
2303 
2304 	default:
2305 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2306 	}
2307 
2308 	PFSYNC_BUCKET_UNLOCK(b);
2309 }
2310 
2311 static void
2312 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2313 {
2314 	struct {
2315 		struct pfsync_subheader subh;
2316 		struct pfsync_clr clr;
2317 	} __packed r;
2318 
2319 	bzero(&r, sizeof(r));
2320 
2321 	r.subh.action = PFSYNC_ACT_CLR;
2322 	r.subh.count = htons(1);
2323 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
2324 
2325 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2326 	r.clr.creatorid = creatorid;
2327 
2328 	pfsync_send_plus(&r, sizeof(r));
2329 }
2330 
2331 static enum pfsync_q_id
2332 pfsync_sstate_to_qid(u_int8_t sync_state)
2333 {
2334 	struct pfsync_softc *sc = V_pfsyncif;
2335 
2336 	switch (sync_state) {
2337 		case PFSYNC_S_INS:
2338 			switch (sc->sc_version) {
2339 				case PFSYNC_MSG_VERSION_1301:
2340 					return PFSYNC_Q_INS_1301;
2341 				case PFSYNC_MSG_VERSION_1400:
2342 					return PFSYNC_Q_INS_1400;
2343 			}
2344 			break;
2345 		case PFSYNC_S_IACK:
2346 			return PFSYNC_Q_IACK;
2347 		case PFSYNC_S_UPD:
2348 			switch (sc->sc_version) {
2349 				case PFSYNC_MSG_VERSION_1301:
2350 					return PFSYNC_Q_UPD_1301;
2351 				case PFSYNC_MSG_VERSION_1400:
2352 					return PFSYNC_Q_UPD_1400;
2353 			}
2354 			break;
2355 		case PFSYNC_S_UPD_C:
2356 			return PFSYNC_Q_UPD_C;
2357 		case PFSYNC_S_DEL_C:
2358 			return PFSYNC_Q_DEL_C;
2359 		default:
2360 			panic("%s: Unsupported st->sync_state 0x%02x",
2361 			__func__, sync_state);
2362 	}
2363 
2364 	panic("%s: Unsupported pfsync_msg_version %d",
2365 	    __func__, sc->sc_version);
2366 }
2367 
2368 static void
2369 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref)
2370 {
2371 	enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state);
2372 	struct pfsync_softc *sc = V_pfsyncif;
2373 	size_t nlen = pfsync_qs[q].len;
2374 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2375 
2376 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2377 
2378 	KASSERT(st->sync_state == PFSYNC_S_NONE,
2379 		("%s: st->sync_state %u", __func__, st->sync_state));
2380 	KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2381 	    b->b_len));
2382 
2383 	if (TAILQ_EMPTY(&b->b_qs[q]))
2384 		nlen += sizeof(struct pfsync_subheader);
2385 
2386 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2387 		pfsync_sendout(1, b->b_id);
2388 
2389 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2390 	}
2391 
2392 	b->b_len += nlen;
2393 	TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
2394 	st->sync_state = pfsync_qid_sstate[q];
2395 	if (ref)
2396 		pf_ref_state(st);
2397 }
2398 
2399 static void
2400 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b)
2401 {
2402 	enum pfsync_q_id q;
2403 
2404 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2405 	KASSERT(st->sync_state != PFSYNC_S_NONE,
2406 		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2407 
2408 	q =  pfsync_sstate_to_qid(st->sync_state);
2409 	b->b_len -= pfsync_qs[q].len;
2410 	TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
2411 	st->sync_state = PFSYNC_S_NONE;
2412 	if (unref)
2413 		pf_release_state(st);
2414 
2415 	if (TAILQ_EMPTY(&b->b_qs[q]))
2416 		b->b_len -= sizeof(struct pfsync_subheader);
2417 }
2418 
2419 static void
2420 pfsync_bulk_start(void)
2421 {
2422 	struct pfsync_softc *sc = V_pfsyncif;
2423 
2424 	if (V_pf_status.debug >= PF_DEBUG_MISC)
2425 		printf("pfsync: received bulk update request\n");
2426 
2427 	PFSYNC_BLOCK(sc);
2428 
2429 	sc->sc_ureq_received = time_uptime;
2430 	sc->sc_bulk_hashid = 0;
2431 	sc->sc_bulk_stateid = 0;
2432 	pfsync_bulk_status(PFSYNC_BUS_START);
2433 	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2434 	PFSYNC_BUNLOCK(sc);
2435 }
2436 
2437 static void
2438 pfsync_bulk_update(void *arg)
2439 {
2440 	struct pfsync_softc *sc = arg;
2441 	struct pf_kstate *s;
2442 	int i;
2443 
2444 	PFSYNC_BLOCK_ASSERT(sc);
2445 	CURVNET_SET(sc->sc_ifp->if_vnet);
2446 
2447 	/*
2448 	 * Start with last state from previous invocation.
2449 	 * It may had gone, in this case start from the
2450 	 * hash slot.
2451 	 */
2452 	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2453 
2454 	if (s != NULL)
2455 		i = PF_IDHASH(s);
2456 	else
2457 		i = sc->sc_bulk_hashid;
2458 
2459 	for (; i <= pf_hashmask; i++) {
2460 		struct pf_idhash *ih = &V_pf_idhash[i];
2461 
2462 		if (s != NULL)
2463 			PF_HASHROW_ASSERT(ih);
2464 		else {
2465 			PF_HASHROW_LOCK(ih);
2466 			s = LIST_FIRST(&ih->states);
2467 		}
2468 
2469 		for (; s; s = LIST_NEXT(s, entry)) {
2470 			if (s->sync_state == PFSYNC_S_NONE &&
2471 			    s->timeout < PFTM_MAX &&
2472 			    s->pfsync_time <= sc->sc_ureq_received) {
2473 				if (pfsync_update_state_req(s)) {
2474 					/* We've filled a packet. */
2475 					sc->sc_bulk_hashid = i;
2476 					sc->sc_bulk_stateid = s->id;
2477 					sc->sc_bulk_creatorid = s->creatorid;
2478 					PF_HASHROW_UNLOCK(ih);
2479 					callout_reset(&sc->sc_bulk_tmo, 1,
2480 					    pfsync_bulk_update, sc);
2481 					goto full;
2482 				}
2483 			}
2484 		}
2485 		PF_HASHROW_UNLOCK(ih);
2486 	}
2487 
2488 	/* We're done. */
2489 	pfsync_bulk_status(PFSYNC_BUS_END);
2490 full:
2491 	CURVNET_RESTORE();
2492 }
2493 
2494 static void
2495 pfsync_bulk_status(u_int8_t status)
2496 {
2497 	struct {
2498 		struct pfsync_subheader subh;
2499 		struct pfsync_bus bus;
2500 	} __packed r;
2501 
2502 	struct pfsync_softc *sc = V_pfsyncif;
2503 
2504 	bzero(&r, sizeof(r));
2505 
2506 	r.subh.action = PFSYNC_ACT_BUS;
2507 	r.subh.count = htons(1);
2508 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2509 
2510 	r.bus.creatorid = V_pf_status.hostid;
2511 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2512 	r.bus.status = status;
2513 
2514 	pfsync_send_plus(&r, sizeof(r));
2515 }
2516 
2517 static void
2518 pfsync_bulk_fail(void *arg)
2519 {
2520 	struct pfsync_softc *sc = arg;
2521 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2522 
2523 	CURVNET_SET(sc->sc_ifp->if_vnet);
2524 
2525 	PFSYNC_BLOCK_ASSERT(sc);
2526 
2527 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2528 		/* Try again */
2529 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2530 		    pfsync_bulk_fail, V_pfsyncif);
2531 		PFSYNC_BUCKET_LOCK(b);
2532 		pfsync_request_update(0, 0);
2533 		PFSYNC_BUCKET_UNLOCK(b);
2534 	} else {
2535 		/* Pretend like the transfer was ok. */
2536 		sc->sc_ureq_sent = 0;
2537 		sc->sc_bulk_tries = 0;
2538 		PFSYNC_LOCK(sc);
2539 		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2540 			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
2541 			    "pfsync bulk fail");
2542 		sc->sc_flags |= PFSYNCF_OK;
2543 		PFSYNC_UNLOCK(sc);
2544 		if (V_pf_status.debug >= PF_DEBUG_MISC)
2545 			printf("pfsync: failed to receive bulk update\n");
2546 	}
2547 
2548 	CURVNET_RESTORE();
2549 }
2550 
2551 static void
2552 pfsync_send_plus(void *plus, size_t pluslen)
2553 {
2554 	struct pfsync_softc *sc = V_pfsyncif;
2555 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2556 
2557 	PFSYNC_BUCKET_LOCK(b);
2558 
2559 	if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
2560 		pfsync_sendout(1, b->b_id);
2561 
2562 	b->b_plus = plus;
2563 	b->b_len += (b->b_pluslen = pluslen);
2564 
2565 	pfsync_sendout(1, b->b_id);
2566 	PFSYNC_BUCKET_UNLOCK(b);
2567 }
2568 
2569 static void
2570 pfsync_timeout(void *arg)
2571 {
2572 	struct pfsync_bucket *b = arg;
2573 
2574 	CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
2575 	PFSYNC_BUCKET_LOCK(b);
2576 	pfsync_push(b);
2577 	PFSYNC_BUCKET_UNLOCK(b);
2578 	CURVNET_RESTORE();
2579 }
2580 
2581 static void
2582 pfsync_push(struct pfsync_bucket *b)
2583 {
2584 
2585 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2586 
2587 	b->b_flags |= PFSYNCF_BUCKET_PUSH;
2588 	swi_sched(V_pfsync_swi_cookie, 0);
2589 }
2590 
2591 static void
2592 pfsync_push_all(struct pfsync_softc *sc)
2593 {
2594 	int c;
2595 	struct pfsync_bucket *b;
2596 
2597 	for (c = 0; c < pfsync_buckets; c++) {
2598 		b = &sc->sc_buckets[c];
2599 
2600 		PFSYNC_BUCKET_LOCK(b);
2601 		pfsync_push(b);
2602 		PFSYNC_BUCKET_UNLOCK(b);
2603 	}
2604 }
2605 
2606 static void
2607 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m)
2608 {
2609 	struct ip *ip;
2610 	int af, error = 0;
2611 
2612 	ip = mtod(m, struct ip *);
2613 	MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4));
2614 
2615 	af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6;
2616 
2617 	/*
2618 	 * We distinguish between a deferral packet and our
2619 	 * own pfsync packet based on M_SKIP_FIREWALL
2620 	 * flag. This is XXX.
2621 	 */
2622 	switch (af) {
2623 #ifdef INET
2624 	case AF_INET:
2625 		if (m->m_flags & M_SKIP_FIREWALL) {
2626 			error = ip_output(m, NULL, NULL, 0,
2627 			    NULL, NULL);
2628 		} else {
2629 			error = ip_output(m, NULL, NULL,
2630 			    IP_RAWOUTPUT, &sc->sc_imo, NULL);
2631 		}
2632 		break;
2633 #endif
2634 #ifdef INET6
2635 	case AF_INET6:
2636 		if (m->m_flags & M_SKIP_FIREWALL) {
2637 			error = ip6_output(m, NULL, NULL, 0,
2638 			    NULL, NULL, NULL);
2639 		} else {
2640 			error = ip6_output(m, NULL, NULL, 0,
2641 				&sc->sc_im6o, NULL, NULL);
2642 		}
2643 		break;
2644 #endif
2645 	}
2646 
2647 	if (error == 0)
2648 		V_pfsyncstats.pfsyncs_opackets++;
2649 	else
2650 		V_pfsyncstats.pfsyncs_oerrors++;
2651 
2652 }
2653 
2654 static void
2655 pfsyncintr(void *arg)
2656 {
2657 	struct epoch_tracker et;
2658 	struct pfsync_softc *sc = arg;
2659 	struct pfsync_bucket *b;
2660 	struct mbuf *m, *n;
2661 	int c;
2662 
2663 	NET_EPOCH_ENTER(et);
2664 	CURVNET_SET(sc->sc_ifp->if_vnet);
2665 
2666 	for (c = 0; c < pfsync_buckets; c++) {
2667 		b = &sc->sc_buckets[c];
2668 
2669 		PFSYNC_BUCKET_LOCK(b);
2670 		if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
2671 			pfsync_sendout(0, b->b_id);
2672 			b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
2673 		}
2674 		_IF_DEQUEUE_ALL(&b->b_snd, m);
2675 		PFSYNC_BUCKET_UNLOCK(b);
2676 
2677 		for (; m != NULL; m = n) {
2678 			n = m->m_nextpkt;
2679 			m->m_nextpkt = NULL;
2680 
2681 			pfsync_tx(sc, m);
2682 		}
2683 	}
2684 	CURVNET_RESTORE();
2685 	NET_EPOCH_EXIT(et);
2686 }
2687 
2688 static int
2689 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
2690     struct in_mfilter* imf, struct in6_mfilter* im6f)
2691 {
2692 #ifdef  INET
2693 	struct ip_moptions *imo = &sc->sc_imo;
2694 #endif
2695 #ifdef INET6
2696 	struct ip6_moptions *im6o = &sc->sc_im6o;
2697 	struct sockaddr_in6 *syncpeer_sa6 = NULL;
2698 #endif
2699 
2700 	if (!(ifp->if_flags & IFF_MULTICAST))
2701 		return (EADDRNOTAVAIL);
2702 
2703 	switch (sc->sc_sync_peer.ss_family) {
2704 #ifdef INET
2705 	case AF_INET:
2706 	{
2707 		int error;
2708 
2709 		ip_mfilter_init(&imo->imo_head);
2710 		imo->imo_multicast_vif = -1;
2711 		if ((error = in_joingroup(ifp,
2712 		    &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL,
2713 		    &imf->imf_inm)) != 0)
2714 			return (error);
2715 
2716 		ip_mfilter_insert(&imo->imo_head, imf);
2717 		imo->imo_multicast_ifp = ifp;
2718 		imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2719 		imo->imo_multicast_loop = 0;
2720 		break;
2721 	}
2722 #endif
2723 #ifdef INET6
2724 	case AF_INET6:
2725 	{
2726 		int error;
2727 
2728 		syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer;
2729 		if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL)))
2730 			return (error);
2731 
2732 		ip6_mfilter_init(&im6o->im6o_head);
2733 		if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL,
2734 		    &(im6f->im6f_in6m), 0)) != 0)
2735 			return (error);
2736 
2737 		ip6_mfilter_insert(&im6o->im6o_head, im6f);
2738 		im6o->im6o_multicast_ifp = ifp;
2739 		im6o->im6o_multicast_hlim = PFSYNC_DFLTTL;
2740 		im6o->im6o_multicast_loop = 0;
2741 		break;
2742 	}
2743 #endif
2744 	}
2745 
2746 	return (0);
2747 }
2748 
2749 static void
2750 pfsync_multicast_cleanup(struct pfsync_softc *sc)
2751 {
2752 #ifdef INET
2753 	struct ip_moptions *imo = &sc->sc_imo;
2754 	struct in_mfilter *imf;
2755 
2756 	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
2757 		ip_mfilter_remove(&imo->imo_head, imf);
2758 		in_leavegroup(imf->imf_inm, NULL);
2759 		ip_mfilter_free(imf);
2760 	}
2761 	imo->imo_multicast_ifp = NULL;
2762 #endif
2763 
2764 #ifdef INET6
2765 	struct ip6_moptions *im6o = &sc->sc_im6o;
2766 	struct in6_mfilter *im6f;
2767 
2768 	while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
2769 		ip6_mfilter_remove(&im6o->im6o_head, im6f);
2770 		in6_leavegroup(im6f->im6f_in6m, NULL);
2771 		ip6_mfilter_free(im6f);
2772 	}
2773 	im6o->im6o_multicast_ifp = NULL;
2774 #endif
2775 }
2776 
2777 void
2778 pfsync_detach_ifnet(struct ifnet *ifp)
2779 {
2780 	struct pfsync_softc *sc = V_pfsyncif;
2781 
2782 	if (sc == NULL)
2783 		return;
2784 
2785 	PFSYNC_LOCK(sc);
2786 
2787 	if (sc->sc_sync_if == ifp) {
2788 		/* We don't need mutlicast cleanup here, because the interface
2789 		 * is going away. We do need to ensure we don't try to do
2790 		 * cleanup later.
2791 		 */
2792 		ip_mfilter_init(&sc->sc_imo.imo_head);
2793 		sc->sc_imo.imo_multicast_ifp = NULL;
2794 		sc->sc_im6o.im6o_multicast_ifp = NULL;
2795 		sc->sc_sync_if = NULL;
2796 	}
2797 
2798 	PFSYNC_UNLOCK(sc);
2799 }
2800 
2801 static int
2802 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status)
2803 {
2804 	struct sockaddr_storage sa;
2805 	status->maxupdates = pfsyncr->pfsyncr_maxupdates;
2806 	status->flags = pfsyncr->pfsyncr_defer;
2807 
2808 	strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ);
2809 
2810 	memset(&sa, 0, sizeof(sa));
2811 	if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) {
2812 		struct sockaddr_in *in = (struct sockaddr_in *)&sa;
2813 		in->sin_family = AF_INET;
2814 		in->sin_len = sizeof(*in);
2815 		in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr;
2816 	}
2817 	status->syncpeer = sa;
2818 
2819 	return 0;
2820 }
2821 
2822 static int
2823 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc)
2824 {
2825 	struct ifnet *sifp;
2826 	struct in_mfilter *imf = NULL;
2827 	struct in6_mfilter *im6f = NULL;
2828 	int error;
2829 	int c;
2830 
2831 	if ((status->maxupdates < 0) || (status->maxupdates > 255))
2832 		return (EINVAL);
2833 
2834 	if (status->syncdev[0] == '\0')
2835 		sifp = NULL;
2836 	else if ((sifp = ifunit_ref(status->syncdev)) == NULL)
2837 		return (EINVAL);
2838 
2839 	switch (status->syncpeer.ss_family) {
2840 #ifdef INET
2841 	case AF_UNSPEC:
2842 	case AF_INET: {
2843 		struct sockaddr_in *status_sin;
2844 		status_sin = (struct sockaddr_in *)&(status->syncpeer);
2845 		if (sifp != NULL) {
2846 			if (status_sin->sin_addr.s_addr == 0 ||
2847 			    status_sin->sin_addr.s_addr ==
2848 			    htonl(INADDR_PFSYNC_GROUP)) {
2849 				status_sin->sin_family = AF_INET;
2850 				status_sin->sin_len = sizeof(*status_sin);
2851 				status_sin->sin_addr.s_addr =
2852 				    htonl(INADDR_PFSYNC_GROUP);
2853 			}
2854 
2855 			if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) {
2856 				imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
2857 			}
2858 		}
2859 		break;
2860 	}
2861 #endif
2862 #ifdef INET6
2863 	case AF_INET6: {
2864 		struct sockaddr_in6 *status_sin6;
2865 		status_sin6 = (struct sockaddr_in6*)&(status->syncpeer);
2866 		if (sifp != NULL) {
2867 			if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) ||
2868 			    IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr,
2869 				&in6addr_linklocal_pfsync_group)) {
2870 				status_sin6->sin6_family = AF_INET6;
2871 				status_sin6->sin6_len = sizeof(*status_sin6);
2872 				status_sin6->sin6_addr =
2873 				    in6addr_linklocal_pfsync_group;
2874 			}
2875 
2876 			if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) {
2877 				im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0);
2878 			}
2879 		}
2880 		break;
2881 	}
2882 #endif
2883 	}
2884 
2885 	PFSYNC_LOCK(sc);
2886 
2887 	switch (status->version) {
2888 		case PFSYNC_MSG_VERSION_UNSPECIFIED:
2889 			sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
2890 			break;
2891 		case PFSYNC_MSG_VERSION_1301:
2892 		case PFSYNC_MSG_VERSION_1400:
2893 			sc->sc_version = status->version;
2894 			break;
2895 		default:
2896 			PFSYNC_UNLOCK(sc);
2897 			return (EINVAL);
2898 	}
2899 
2900 	switch (status->syncpeer.ss_family) {
2901 	case AF_INET: {
2902 		struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer);
2903 		struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer;
2904 		sc_sin->sin_family = AF_INET;
2905 		sc_sin->sin_len = sizeof(*sc_sin);
2906 		if (status_sin->sin_addr.s_addr == 0) {
2907 			sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
2908 		} else {
2909 			sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr;
2910 		}
2911 		break;
2912 	}
2913 	case AF_INET6: {
2914 		struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer);
2915 		struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer;
2916 		sc_sin->sin6_family = AF_INET6;
2917 		sc_sin->sin6_len = sizeof(*sc_sin);
2918 		if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) {
2919 			sc_sin->sin6_addr = in6addr_linklocal_pfsync_group;
2920 		} else {
2921 			sc_sin->sin6_addr = status_sin->sin6_addr;
2922 		}
2923 		break;
2924 	}
2925 	}
2926 
2927 	sc->sc_maxupdates = status->maxupdates;
2928 	if (status->flags & PFSYNCF_DEFER) {
2929 		sc->sc_flags |= PFSYNCF_DEFER;
2930 		V_pfsync_defer_ptr = pfsync_defer;
2931 	} else {
2932 		sc->sc_flags &= ~PFSYNCF_DEFER;
2933 		V_pfsync_defer_ptr = NULL;
2934 	}
2935 
2936 	if (sifp == NULL) {
2937 		if (sc->sc_sync_if)
2938 			if_rele(sc->sc_sync_if);
2939 		sc->sc_sync_if = NULL;
2940 		pfsync_multicast_cleanup(sc);
2941 		PFSYNC_UNLOCK(sc);
2942 		return (0);
2943 	}
2944 
2945 	for (c = 0; c < pfsync_buckets; c++) {
2946 		PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
2947 		if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
2948 		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
2949 			(sc->sc_sync_if != NULL &&
2950 			    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
2951 			sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
2952 			pfsync_sendout(1, c);
2953 		PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
2954 	}
2955 
2956 	pfsync_multicast_cleanup(sc);
2957 
2958 	if (((sc->sc_sync_peer.ss_family == AF_INET) &&
2959 	    IN_MULTICAST(ntohl(((struct sockaddr_in *)
2960 	        &sc->sc_sync_peer)->sin_addr.s_addr))) ||
2961 	    ((sc->sc_sync_peer.ss_family == AF_INET6) &&
2962 	    IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*)
2963 	        &sc->sc_sync_peer)->sin6_addr))) {
2964 		error = pfsync_multicast_setup(sc, sifp, imf, im6f);
2965 		if (error) {
2966 			if_rele(sifp);
2967 			PFSYNC_UNLOCK(sc);
2968 #ifdef INET
2969 			if (imf != NULL)
2970 				ip_mfilter_free(imf);
2971 #endif
2972 #ifdef INET6
2973 			if (im6f != NULL)
2974 				ip6_mfilter_free(im6f);
2975 #endif
2976 			return (error);
2977 		}
2978 	}
2979 	if (sc->sc_sync_if)
2980 		if_rele(sc->sc_sync_if);
2981 	sc->sc_sync_if = sifp;
2982 
2983 	switch (sc->sc_sync_peer.ss_family) {
2984 #ifdef INET
2985 	case AF_INET: {
2986 		struct ip *ip;
2987 		ip = &sc->sc_template.ipv4;
2988 		bzero(ip, sizeof(*ip));
2989 		ip->ip_v = IPVERSION;
2990 		ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2;
2991 		ip->ip_tos = IPTOS_LOWDELAY;
2992 		/* len and id are set later. */
2993 		ip->ip_off = htons(IP_DF);
2994 		ip->ip_ttl = PFSYNC_DFLTTL;
2995 		ip->ip_p = IPPROTO_PFSYNC;
2996 		ip->ip_src.s_addr = INADDR_ANY;
2997 		ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
2998 		break;
2999 	}
3000 #endif
3001 #ifdef INET6
3002 	case AF_INET6: {
3003 		struct ip6_hdr *ip6;
3004 		ip6 = &sc->sc_template.ipv6;
3005 		bzero(ip6, sizeof(*ip6));
3006 		ip6->ip6_vfc = IPV6_VERSION;
3007 		ip6->ip6_hlim = PFSYNC_DFLTTL;
3008 		ip6->ip6_nxt = IPPROTO_PFSYNC;
3009 		ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr;
3010 
3011 		struct epoch_tracker et;
3012 		NET_EPOCH_ENTER(et);
3013 		in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0,
3014 		    sc->sc_sync_if, &ip6->ip6_src, NULL);
3015 		NET_EPOCH_EXIT(et);
3016 		break;
3017 	}
3018 #endif
3019 	}
3020 
3021 	/* Request a full state table update. */
3022 	if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
3023 		(*carp_demote_adj_p)(V_pfsync_carp_adj,
3024 		    "pfsync bulk start");
3025 	sc->sc_flags &= ~PFSYNCF_OK;
3026 	if (V_pf_status.debug >= PF_DEBUG_MISC)
3027 		printf("pfsync: requesting bulk update\n");
3028 	PFSYNC_UNLOCK(sc);
3029 	PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
3030 	pfsync_request_update(0, 0);
3031 	PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
3032 	PFSYNC_BLOCK(sc);
3033 	sc->sc_ureq_sent = time_uptime;
3034 	callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc);
3035 	PFSYNC_BUNLOCK(sc);
3036 	return (0);
3037 }
3038 
3039 static void
3040 pfsync_pointers_init(void)
3041 {
3042 
3043 	PF_RULES_WLOCK();
3044 	V_pfsync_state_import_ptr = pfsync_state_import;
3045 	V_pfsync_insert_state_ptr = pfsync_insert_state;
3046 	V_pfsync_update_state_ptr = pfsync_update_state;
3047 	V_pfsync_delete_state_ptr = pfsync_delete_state;
3048 	V_pfsync_clear_states_ptr = pfsync_clear_states;
3049 	V_pfsync_defer_ptr = pfsync_defer;
3050 	PF_RULES_WUNLOCK();
3051 }
3052 
3053 static void
3054 pfsync_pointers_uninit(void)
3055 {
3056 
3057 	PF_RULES_WLOCK();
3058 	V_pfsync_state_import_ptr = NULL;
3059 	V_pfsync_insert_state_ptr = NULL;
3060 	V_pfsync_update_state_ptr = NULL;
3061 	V_pfsync_delete_state_ptr = NULL;
3062 	V_pfsync_clear_states_ptr = NULL;
3063 	V_pfsync_defer_ptr = NULL;
3064 	PF_RULES_WUNLOCK();
3065 }
3066 
3067 static void
3068 vnet_pfsync_init(const void *unused __unused)
3069 {
3070 	int error;
3071 
3072 	V_pfsync_cloner = if_clone_simple(pfsyncname,
3073 	    pfsync_clone_create, pfsync_clone_destroy, 1);
3074 	error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif,
3075 	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3076 	if (error) {
3077 		if_clone_detach(V_pfsync_cloner);
3078 		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
3079 	}
3080 
3081 	pfsync_pointers_init();
3082 }
3083 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
3084     vnet_pfsync_init, NULL);
3085 
3086 static void
3087 vnet_pfsync_uninit(const void *unused __unused)
3088 {
3089 	int ret __diagused;
3090 
3091 	pfsync_pointers_uninit();
3092 
3093 	if_clone_detach(V_pfsync_cloner);
3094 	ret = swi_remove(V_pfsync_swi_cookie);
3095 	MPASS(ret == 0);
3096 	ret = intr_event_destroy(V_pfsync_swi_ie);
3097 	MPASS(ret == 0);
3098 }
3099 
3100 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
3101     vnet_pfsync_uninit, NULL);
3102 
3103 static int
3104 pfsync_init(void)
3105 {
3106 	int error;
3107 
3108 	pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
3109 
3110 #ifdef INET
3111 	error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL);
3112 	if (error)
3113 		return (error);
3114 #endif
3115 #ifdef INET6
3116 	error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL);
3117 	if (error) {
3118 		ipproto_unregister(IPPROTO_PFSYNC);
3119 		return (error);
3120 	}
3121 #endif
3122 
3123 	return (0);
3124 }
3125 
3126 static void
3127 pfsync_uninit(void)
3128 {
3129 	pfsync_detach_ifnet_ptr = NULL;
3130 
3131 #ifdef INET
3132 	ipproto_unregister(IPPROTO_PFSYNC);
3133 #endif
3134 #ifdef INET6
3135 	ip6proto_unregister(IPPROTO_PFSYNC);
3136 #endif
3137 }
3138 
3139 static int
3140 pfsync_modevent(module_t mod, int type, void *data)
3141 {
3142 	int error = 0;
3143 
3144 	switch (type) {
3145 	case MOD_LOAD:
3146 		error = pfsync_init();
3147 		break;
3148 	case MOD_UNLOAD:
3149 		pfsync_uninit();
3150 		break;
3151 	default:
3152 		error = EINVAL;
3153 		break;
3154 	}
3155 
3156 	return (error);
3157 }
3158 
3159 static moduledata_t pfsync_mod = {
3160 	pfsyncname,
3161 	pfsync_modevent,
3162 	0
3163 };
3164 
3165 #define PFSYNC_MODVER 1
3166 
3167 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
3168 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
3169 MODULE_VERSION(pfsync, PFSYNC_MODVER);
3170 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3171