xref: /freebsd/sys/netpfil/pf/if_pfsync.c (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause AND ISC)
3  *
4  * Copyright (c) 2002 Michael Shalayeff
5  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*-
31  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
32  *
33  * Permission to use, copy, modify, and distribute this software for any
34  * purpose with or without fee is hereby granted, provided that the above
35  * copyright notice and this permission notice appear in all copies.
36  *
37  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
38  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
39  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
40  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
41  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
42  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
43  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44  */
45 
46 /*
47  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
48  *
49  * Revisions picked from OpenBSD after revision 1.110 import:
50  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
51  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
52  * 1.120, 1.175 - use monotonic time_uptime
53  * 1.122 - reduce number of updates for non-TCP sessions
54  * 1.125, 1.127 - rewrite merge or stale processing
55  * 1.128 - cleanups
56  * 1.146 - bzero() mbuf before sparsely filling it with data
57  * 1.170 - SIOCSIFMTU checks
58  * 1.126, 1.142 - deferred packets processing
59  * 1.173 - correct expire time processing
60  */
61 
62 #include <sys/cdefs.h>
63 #include "opt_inet.h"
64 #include "opt_inet6.h"
65 #include "opt_pf.h"
66 
67 #include <sys/param.h>
68 #include <sys/bus.h>
69 #include <sys/endian.h>
70 #include <sys/interrupt.h>
71 #include <sys/kernel.h>
72 #include <sys/lock.h>
73 #include <sys/mbuf.h>
74 #include <sys/module.h>
75 #include <sys/mutex.h>
76 #include <sys/nv.h>
77 #include <sys/priv.h>
78 #include <sys/smp.h>
79 #include <sys/socket.h>
80 #include <sys/sockio.h>
81 #include <sys/sysctl.h>
82 #include <sys/syslog.h>
83 
84 #include <net/bpf.h>
85 #include <net/if.h>
86 #include <net/if_var.h>
87 #include <net/if_clone.h>
88 #include <net/if_private.h>
89 #include <net/if_types.h>
90 #include <net/vnet.h>
91 #include <net/pfvar.h>
92 #include <net/route.h>
93 #include <net/if_pfsync.h>
94 
95 #include <netinet/if_ether.h>
96 #include <netinet/in.h>
97 #include <netinet/in_var.h>
98 #include <netinet6/in6_var.h>
99 #include <netinet/ip.h>
100 #include <netinet/ip6.h>
101 #include <netinet/ip_carp.h>
102 #include <netinet/ip_var.h>
103 #include <netinet/tcp.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/tcp_seq.h>
106 
107 #include <netinet/ip6.h>
108 #include <netinet6/ip6_var.h>
109 #include <netinet6/scope6_var.h>
110 
111 #include <netpfil/pf/pfsync_nv.h>
112 
113 struct pfsync_bucket;
114 struct pfsync_softc;
115 
116 union inet_template {
117 	struct ip	ipv4;
118 	struct ip6_hdr	ipv6;
119 };
120 
121 #define PFSYNC_MINPKT ( \
122 	sizeof(union inet_template) + \
123 	sizeof(struct pfsync_header) + \
124 	sizeof(struct pfsync_subheader) )
125 
126 static int	pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *,
127 		    struct pfsync_state_peer *);
128 static int	pfsync_in_clr(struct mbuf *, int, int, int, int);
129 static int	pfsync_in_ins(struct mbuf *, int, int, int, int);
130 static int	pfsync_in_iack(struct mbuf *, int, int, int, int);
131 static int	pfsync_in_upd(struct mbuf *, int, int, int, int);
132 static int	pfsync_in_upd_c(struct mbuf *, int, int, int, int);
133 static int	pfsync_in_ureq(struct mbuf *, int, int, int, int);
134 static int	pfsync_in_del_c(struct mbuf *, int, int, int, int);
135 static int	pfsync_in_bus(struct mbuf *, int, int, int, int);
136 static int	pfsync_in_tdb(struct mbuf *, int, int, int, int);
137 static int	pfsync_in_eof(struct mbuf *, int, int, int, int);
138 static int	pfsync_in_error(struct mbuf *, int, int, int, int);
139 
140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = {
141 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
142 	pfsync_in_ins,			/* PFSYNC_ACT_INS_1301 */
143 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
144 	pfsync_in_upd,			/* PFSYNC_ACT_UPD_1301 */
145 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
146 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
147 	pfsync_in_error,		/* PFSYNC_ACT_DEL */
148 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
149 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
150 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
151 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
152 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
153 	pfsync_in_eof,			/* PFSYNC_ACT_EOF */
154 	pfsync_in_ins,			/* PFSYNC_ACT_INS_1400 */
155 	pfsync_in_upd,			/* PFSYNC_ACT_UPD_1400 */
156 };
157 
158 struct pfsync_q {
159 	void		(*write)(struct pf_kstate *, void *);
160 	size_t		len;
161 	u_int8_t	action;
162 };
163 
164 /* We have the following sync queues */
165 enum pfsync_q_id {
166 	PFSYNC_Q_INS_1301,
167 	PFSYNC_Q_INS_1400,
168 	PFSYNC_Q_IACK,
169 	PFSYNC_Q_UPD_1301,
170 	PFSYNC_Q_UPD_1400,
171 	PFSYNC_Q_UPD_C,
172 	PFSYNC_Q_DEL_C,
173 	PFSYNC_Q_COUNT,
174 };
175 
176 /* Functions for building messages for given queue */
177 static void	pfsync_out_state_1301(struct pf_kstate *, void *);
178 static void	pfsync_out_state_1400(struct pf_kstate *, void *);
179 static void	pfsync_out_iack(struct pf_kstate *, void *);
180 static void	pfsync_out_upd_c(struct pf_kstate *, void *);
181 static void	pfsync_out_del_c(struct pf_kstate *, void *);
182 
183 /* Attach those functions to queue */
184 static struct pfsync_q pfsync_qs[] = {
185 	{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 },
186 	{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 },
187 	{ pfsync_out_iack,       sizeof(struct pfsync_ins_ack),    PFSYNC_ACT_INS_ACK },
188 	{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 },
189 	{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 },
190 	{ pfsync_out_upd_c,      sizeof(struct pfsync_upd_c),      PFSYNC_ACT_UPD_C },
191 	{ pfsync_out_del_c,      sizeof(struct pfsync_del_c),      PFSYNC_ACT_DEL_C }
192 };
193 
194 /* Map queue to pf_kstate->sync_state */
195 static u_int8_t pfsync_qid_sstate[] = {
196 	PFSYNC_S_INS,   /* PFSYNC_Q_INS_1301 */
197 	PFSYNC_S_INS,   /* PFSYNC_Q_INS_1400 */
198 	PFSYNC_S_IACK,  /* PFSYNC_Q_IACK */
199 	PFSYNC_S_UPD,   /* PFSYNC_Q_UPD_1301 */
200 	PFSYNC_S_UPD,   /* PFSYNC_Q_UPD_1400 */
201 	PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */
202 	PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */
203 };
204 
205 /* Map pf_kstate->sync_state to queue */
206 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t);
207 
208 static void	pfsync_q_ins(struct pf_kstate *, int sync_state, bool);
209 static void	pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *);
210 
211 static void	pfsync_update_state(struct pf_kstate *);
212 static void	pfsync_tx(struct pfsync_softc *, struct mbuf *);
213 
214 struct pfsync_upd_req_item {
215 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
216 	struct pfsync_upd_req			ur_msg;
217 };
218 
219 struct pfsync_deferral {
220 	struct pfsync_softc		*pd_sc;
221 	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
222 	struct callout			pd_tmo;
223 
224 	struct pf_kstate		*pd_st;
225 	struct mbuf			*pd_m;
226 };
227 
228 struct pfsync_bucket
229 {
230 	int			b_id;
231 	struct pfsync_softc	*b_sc;
232 	struct mtx		b_mtx;
233 	struct callout		b_tmo;
234 	int			b_flags;
235 #define	PFSYNCF_BUCKET_PUSH	0x00000001
236 
237 	size_t			b_len;
238 	TAILQ_HEAD(, pf_kstate)			b_qs[PFSYNC_Q_COUNT];
239 	TAILQ_HEAD(, pfsync_upd_req_item)	b_upd_req_list;
240 	TAILQ_HEAD(, pfsync_deferral)		b_deferrals;
241 	u_int			b_deferred;
242 	uint8_t			*b_plus;
243 	size_t			b_pluslen;
244 
245 	struct  ifaltq b_snd;
246 };
247 
248 struct pfsync_softc {
249 	/* Configuration */
250 	struct ifnet		*sc_ifp;
251 	struct ifnet		*sc_sync_if;
252 	struct ip_moptions	sc_imo;
253 	struct ip6_moptions	sc_im6o;
254 	struct sockaddr_storage	sc_sync_peer;
255 	uint32_t		sc_flags;
256 	uint8_t			sc_maxupdates;
257 	union inet_template     sc_template;
258 	struct mtx		sc_mtx;
259 	uint32_t		sc_version;
260 
261 	/* Queued data */
262 	struct pfsync_bucket	*sc_buckets;
263 
264 	/* Bulk update info */
265 	struct mtx		sc_bulk_mtx;
266 	uint32_t		sc_ureq_sent;
267 	int			sc_bulk_tries;
268 	uint32_t		sc_ureq_received;
269 	int			sc_bulk_hashid;
270 	uint64_t		sc_bulk_stateid;
271 	uint32_t		sc_bulk_creatorid;
272 	struct callout		sc_bulk_tmo;
273 	struct callout		sc_bulkfail_tmo;
274 };
275 
276 #define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
277 #define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
278 #define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
279 
280 #define PFSYNC_BUCKET_LOCK(b)		mtx_lock(&(b)->b_mtx)
281 #define PFSYNC_BUCKET_UNLOCK(b)		mtx_unlock(&(b)->b_mtx)
282 #define PFSYNC_BUCKET_LOCK_ASSERT(b)	mtx_assert(&(b)->b_mtx, MA_OWNED)
283 
284 #define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
285 #define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
286 #define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
287 
288 #define PFSYNC_DEFER_TIMEOUT	20
289 
290 static const char pfsyncname[] = "pfsync";
291 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
292 VNET_DEFINE_STATIC(struct pfsync_softc	*, pfsyncif) = NULL;
293 #define	V_pfsyncif		VNET(pfsyncif)
294 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
295 #define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
296 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie);
297 #define	V_pfsync_swi_ie		VNET(pfsync_swi_ie)
298 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
299 #define	V_pfsyncstats		VNET(pfsyncstats)
300 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
301 #define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
302 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT;
303 #define	V_pfsync_defer_timeout	VNET(pfsync_defer_timeout)
304 
305 static void	pfsync_timeout(void *);
306 static void	pfsync_push(struct pfsync_bucket *);
307 static void	pfsync_push_all(struct pfsync_softc *);
308 static void	pfsyncintr(void *);
309 static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
310 		    struct in_mfilter *, struct in6_mfilter *);
311 static void	pfsync_multicast_cleanup(struct pfsync_softc *);
312 static void	pfsync_pointers_init(void);
313 static void	pfsync_pointers_uninit(void);
314 static int	pfsync_init(void);
315 static void	pfsync_uninit(void);
316 
317 static unsigned long pfsync_buckets;
318 
319 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
320     "PFSYNC");
321 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
322     &VNET_NAME(pfsyncstats), pfsyncstats,
323     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
324 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW,
325     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
326 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
327     &pfsync_buckets, 0, "Number of pfsync hash buckets");
328 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW,
329     &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)");
330 
331 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
332 static void	pfsync_clone_destroy(struct ifnet *);
333 static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
334 		    struct pf_state_peer *);
335 static int	pfsyncoutput(struct ifnet *, struct mbuf *,
336 		    const struct sockaddr *, struct route *);
337 static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
338 
339 static int	pfsync_defer(struct pf_kstate *, struct mbuf *);
340 static void	pfsync_undefer(struct pfsync_deferral *, int);
341 static void	pfsync_undefer_state_locked(struct pf_kstate *, int);
342 static void	pfsync_undefer_state(struct pf_kstate *, int);
343 static void	pfsync_defer_tmo(void *);
344 
345 static void	pfsync_request_update(u_int32_t, u_int64_t);
346 static bool	pfsync_update_state_req(struct pf_kstate *);
347 
348 static void	pfsync_drop_all(struct pfsync_softc *);
349 static void	pfsync_drop(struct pfsync_softc *, int);
350 static void	pfsync_sendout(int, int);
351 static void	pfsync_send_plus(void *, size_t);
352 
353 static void	pfsync_bulk_start(void);
354 static void	pfsync_bulk_status(u_int8_t);
355 static void	pfsync_bulk_update(void *);
356 static void	pfsync_bulk_fail(void *);
357 
358 static void	pfsync_detach_ifnet(struct ifnet *);
359 
360 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *,
361     struct pfsync_kstatus *);
362 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *,
363     struct pfsync_softc *);
364 
365 #ifdef IPSEC
366 static void	pfsync_update_net_tdb(struct pfsync_tdb *);
367 #endif
368 static struct pfsync_bucket	*pfsync_get_bucket(struct pfsync_softc *,
369 		    struct pf_kstate *);
370 
371 #define PFSYNC_MAX_BULKTRIES	12
372 
373 VNET_DEFINE(struct if_clone *, pfsync_cloner);
374 #define	V_pfsync_cloner	VNET(pfsync_cloner)
375 
376 const struct in6_addr in6addr_linklocal_pfsync_group =
377 	{{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
378 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}};
379 static int
380 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
381 {
382 	struct pfsync_softc *sc;
383 	struct ifnet *ifp;
384 	struct pfsync_bucket *b;
385 	int c;
386 	enum pfsync_q_id q;
387 
388 	if (unit != 0)
389 		return (EINVAL);
390 
391 	if (! pfsync_buckets)
392 		pfsync_buckets = mp_ncpus * 2;
393 
394 	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
395 	sc->sc_flags |= PFSYNCF_OK;
396 	sc->sc_maxupdates = 128;
397 	sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
398 
399 	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
400 	if_initname(ifp, pfsyncname, unit);
401 	ifp->if_softc = sc;
402 	ifp->if_ioctl = pfsyncioctl;
403 	ifp->if_output = pfsyncoutput;
404 	ifp->if_type = IFT_PFSYNC;
405 	ifp->if_hdrlen = sizeof(struct pfsync_header);
406 	ifp->if_mtu = ETHERMTU;
407 	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
408 	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
409 	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
410 	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
411 
412 	if_attach(ifp);
413 
414 	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
415 
416 	sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
417 	    M_PFSYNC, M_ZERO | M_WAITOK);
418 	for (c = 0; c < pfsync_buckets; c++) {
419 		b = &sc->sc_buckets[c];
420 		mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
421 
422 		b->b_id = c;
423 		b->b_sc = sc;
424 		b->b_len = PFSYNC_MINPKT;
425 
426 		for (q = 0; q < PFSYNC_Q_COUNT; q++)
427 			TAILQ_INIT(&b->b_qs[q]);
428 
429 		TAILQ_INIT(&b->b_upd_req_list);
430 		TAILQ_INIT(&b->b_deferrals);
431 
432 		callout_init(&b->b_tmo, 1);
433 
434 		b->b_snd.ifq_maxlen = ifqmaxlen;
435 	}
436 
437 	V_pfsyncif = sc;
438 
439 	return (0);
440 }
441 
442 static void
443 pfsync_clone_destroy(struct ifnet *ifp)
444 {
445 	struct pfsync_softc *sc = ifp->if_softc;
446 	struct pfsync_bucket *b;
447 	int c, ret;
448 
449 	for (c = 0; c < pfsync_buckets; c++) {
450 		b = &sc->sc_buckets[c];
451 		/*
452 		 * At this stage, everything should have already been
453 		 * cleared by pfsync_uninit(), and we have only to
454 		 * drain callouts.
455 		 */
456 		PFSYNC_BUCKET_LOCK(b);
457 		while (b->b_deferred > 0) {
458 			struct pfsync_deferral *pd =
459 			    TAILQ_FIRST(&b->b_deferrals);
460 
461 			ret = callout_stop(&pd->pd_tmo);
462 			PFSYNC_BUCKET_UNLOCK(b);
463 			if (ret > 0) {
464 				pfsync_undefer(pd, 1);
465 			} else {
466 				callout_drain(&pd->pd_tmo);
467 			}
468 			PFSYNC_BUCKET_LOCK(b);
469 		}
470 		MPASS(b->b_deferred == 0);
471 		MPASS(TAILQ_EMPTY(&b->b_deferrals));
472 		PFSYNC_BUCKET_UNLOCK(b);
473 
474 		free(b->b_plus, M_PFSYNC);
475 		b->b_plus = NULL;
476 		b->b_pluslen = 0;
477 
478 		callout_drain(&b->b_tmo);
479 	}
480 
481 	callout_drain(&sc->sc_bulkfail_tmo);
482 	callout_drain(&sc->sc_bulk_tmo);
483 
484 	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
485 		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
486 	bpfdetach(ifp);
487 	if_detach(ifp);
488 
489 	pfsync_drop_all(sc);
490 
491 	if_free(ifp);
492 	pfsync_multicast_cleanup(sc);
493 	mtx_destroy(&sc->sc_mtx);
494 	mtx_destroy(&sc->sc_bulk_mtx);
495 
496 	free(sc->sc_buckets, M_PFSYNC);
497 	free(sc, M_PFSYNC);
498 
499 	V_pfsyncif = NULL;
500 }
501 
502 static int
503 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
504     struct pf_state_peer *d)
505 {
506 	if (s->scrub.scrub_flag && d->scrub == NULL) {
507 		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
508 		if (d->scrub == NULL)
509 			return (ENOMEM);
510 	}
511 
512 	return (0);
513 }
514 
515 static int
516 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
517 {
518 	struct pfsync_softc *sc = V_pfsyncif;
519 #ifndef	__NO_STRICT_ALIGNMENT
520 	struct pfsync_state_key key[2];
521 #endif
522 	struct pfsync_state_key *kw, *ks;
523 	struct pf_kstate	*st = NULL;
524 	struct pf_state_key *skw = NULL, *sks = NULL;
525 	struct pf_krule *r = NULL;
526 	struct pfi_kkif	*kif;
527 	int error;
528 
529 	PF_RULES_RASSERT();
530 
531 	if (sp->pfs_1301.creatorid == 0) {
532 		if (V_pf_status.debug >= PF_DEBUG_MISC)
533 			printf("%s: invalid creator id: %08x\n", __func__,
534 			    ntohl(sp->pfs_1301.creatorid));
535 		return (EINVAL);
536 	}
537 
538 	if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) {
539 		if (V_pf_status.debug >= PF_DEBUG_MISC)
540 			printf("%s: unknown interface: %s\n", __func__,
541 			    sp->pfs_1301.ifname);
542 		if (flags & PFSYNC_SI_IOCTL)
543 			return (EINVAL);
544 		return (0);	/* skip this state */
545 	}
546 
547 	/*
548 	 * If the ruleset checksums match or the state is coming from the ioctl,
549 	 * it's safe to associate the state with the rule of that number.
550 	 */
551 	if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) &&
552 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) <
553 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
554 		r = pf_main_ruleset.rules[
555 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)];
556 	else
557 		r = &V_pf_default_rule;
558 
559 	if ((r->max_states &&
560 	    counter_u64_fetch(r->states_cur) >= r->max_states))
561 		goto cleanup;
562 
563 	/*
564 	 * XXXGL: consider M_WAITOK in ioctl path after.
565 	 */
566 	st = pf_alloc_state(M_NOWAIT);
567 	if (__predict_false(st == NULL))
568 		goto cleanup;
569 
570 	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
571 		goto cleanup;
572 
573 #ifndef	__NO_STRICT_ALIGNMENT
574 	bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2);
575 	kw = &key[PF_SK_WIRE];
576 	ks = &key[PF_SK_STACK];
577 #else
578 	kw = &sp->pfs_1301.key[PF_SK_WIRE];
579 	ks = &sp->pfs_1301.key[PF_SK_STACK];
580 #endif
581 
582 	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) ||
583 	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) ||
584 	    kw->port[0] != ks->port[0] ||
585 	    kw->port[1] != ks->port[1]) {
586 		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
587 		if (sks == NULL)
588 			goto cleanup;
589 	} else
590 		sks = skw;
591 
592 	/* allocate memory for scrub info */
593 	if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) ||
594 	    pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst))
595 		goto cleanup;
596 
597 	/* Copy to state key(s). */
598 	skw->addr[0] = kw->addr[0];
599 	skw->addr[1] = kw->addr[1];
600 	skw->port[0] = kw->port[0];
601 	skw->port[1] = kw->port[1];
602 	skw->proto = sp->pfs_1301.proto;
603 	skw->af = sp->pfs_1301.af;
604 	if (sks != skw) {
605 		sks->addr[0] = ks->addr[0];
606 		sks->addr[1] = ks->addr[1];
607 		sks->port[0] = ks->port[0];
608 		sks->port[1] = ks->port[1];
609 		sks->proto = sp->pfs_1301.proto;
610 		sks->af = sp->pfs_1301.af;
611 	}
612 
613 	/* copy to state */
614 	bcopy(&sp->pfs_1301.rt_addr, &st->rt_addr, sizeof(st->rt_addr));
615 	st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000;
616 	st->expire = pf_get_uptime();
617 	if (sp->pfs_1301.expire) {
618 		uint32_t timeout;
619 
620 		timeout = r->timeout[sp->pfs_1301.timeout];
621 		if (!timeout)
622 			timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout];
623 
624 		/* sp->expire may have been adaptively scaled by export. */
625 		st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000;
626 	}
627 
628 	st->direction = sp->pfs_1301.direction;
629 	st->act.log = sp->pfs_1301.log;
630 	st->timeout = sp->pfs_1301.timeout;
631 
632 	switch (msg_version) {
633 		case PFSYNC_MSG_VERSION_1301:
634 			st->state_flags = sp->pfs_1301.state_flags;
635 			/*
636 			 * In FreeBSD 13 pfsync lacks many attributes. Copy them
637 			 * from the rule if possible. If rule can't be matched
638 			 * clear any set options as we can't recover their
639 			 * parameters.
640 			*/
641 			if (r == &V_pf_default_rule) {
642 				st->state_flags &= ~PFSTATE_SETMASK;
643 			} else {
644 				/*
645 				 * Similar to pf_rule_to_actions(). This code
646 				 * won't set the actions properly if they come
647 				 * from multiple "match" rules as only rule
648 				 * creating the state is send over pfsync.
649 				 */
650 				st->act.qid = r->qid;
651 				st->act.pqid = r->pqid;
652 				st->act.rtableid = r->rtableid;
653 				if (r->scrub_flags & PFSTATE_SETTOS)
654 					st->act.set_tos = r->set_tos;
655 				st->act.min_ttl = r->min_ttl;
656 				st->act.max_mss = r->max_mss;
657 				st->state_flags |= (r->scrub_flags &
658 				    (PFSTATE_NODF|PFSTATE_RANDOMID|
659 				    PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|
660 				    PFSTATE_SETPRIO));
661 				if (r->dnpipe || r->dnrpipe) {
662 					if (r->free_flags & PFRULE_DN_IS_PIPE)
663 						st->state_flags |= PFSTATE_DN_IS_PIPE;
664 					else
665 						st->state_flags &= ~PFSTATE_DN_IS_PIPE;
666 				}
667 				st->act.dnpipe = r->dnpipe;
668 				st->act.dnrpipe = r->dnrpipe;
669 			}
670 			break;
671 		case PFSYNC_MSG_VERSION_1400:
672 			st->state_flags = ntohs(sp->pfs_1400.state_flags);
673 			st->act.qid = ntohs(sp->pfs_1400.qid);
674 			st->act.pqid = ntohs(sp->pfs_1400.pqid);
675 			st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe);
676 			st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe);
677 			st->act.rtableid = ntohl(sp->pfs_1400.rtableid);
678 			st->act.min_ttl = sp->pfs_1400.min_ttl;
679 			st->act.set_tos = sp->pfs_1400.set_tos;
680 			st->act.max_mss = ntohs(sp->pfs_1400.max_mss);
681 			st->act.set_prio[0] = sp->pfs_1400.set_prio[0];
682 			st->act.set_prio[1] = sp->pfs_1400.set_prio[1];
683 			st->rt = sp->pfs_1400.rt;
684 			if (st->rt && (st->rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname)) == NULL) {
685 				if (V_pf_status.debug >= PF_DEBUG_MISC)
686 					printf("%s: unknown route interface: %s\n",
687 					    __func__, sp->pfs_1400.rt_ifname);
688 				if (flags & PFSYNC_SI_IOCTL)
689 					error = EINVAL;
690 				else
691 					error = 0;
692 				goto cleanup_keys;
693 			}
694 			break;
695 		default:
696 			panic("%s: Unsupported pfsync_msg_version %d",
697 			    __func__, msg_version);
698 	}
699 
700 	st->id = sp->pfs_1301.id;
701 	st->creatorid = sp->pfs_1301.creatorid;
702 	pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
703 	pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
704 
705 	st->rule.ptr = r;
706 	st->nat_rule.ptr = NULL;
707 	st->anchor.ptr = NULL;
708 
709 	st->pfsync_time = time_uptime;
710 	st->sync_state = PFSYNC_S_NONE;
711 
712 	if (!(flags & PFSYNC_SI_IOCTL))
713 		st->state_flags |= PFSTATE_NOSYNC;
714 
715 	if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0)
716 		goto cleanup_state;
717 
718 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
719 	counter_u64_add(r->states_cur, 1);
720 	counter_u64_add(r->states_tot, 1);
721 
722 	if (!(flags & PFSYNC_SI_IOCTL)) {
723 		st->state_flags &= ~PFSTATE_NOSYNC;
724 		if (st->state_flags & PFSTATE_ACK) {
725 			struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
726 			PFSYNC_BUCKET_LOCK(b);
727 			pfsync_q_ins(st, PFSYNC_S_IACK, true);
728 			PFSYNC_BUCKET_UNLOCK(b);
729 
730 			pfsync_push_all(sc);
731 		}
732 	}
733 	st->state_flags &= ~PFSTATE_ACK;
734 	PF_STATE_UNLOCK(st);
735 
736 	return (0);
737 
738 cleanup:
739 	error = ENOMEM;
740 cleanup_keys:
741 	if (skw == sks)
742 		sks = NULL;
743 	uma_zfree(V_pf_state_key_z, skw);
744 	uma_zfree(V_pf_state_key_z, sks);
745 
746 cleanup_state:	/* pf_state_insert() frees the state keys. */
747 	if (st) {
748 		st->timeout = PFTM_UNLINKED; /* appease an assert */
749 		pf_free_state(st);
750 	}
751 	return (error);
752 }
753 
754 #ifdef INET
755 static int
756 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
757 {
758 	struct pfsync_softc *sc = V_pfsyncif;
759 	struct mbuf *m = *mp;
760 	struct ip *ip = mtod(m, struct ip *);
761 	struct pfsync_header *ph;
762 	struct pfsync_subheader subh;
763 
764 	int offset, len, flags = 0;
765 	int rv;
766 	uint16_t count;
767 
768 	PF_RULES_RLOCK_TRACKER;
769 
770 	*mp = NULL;
771 	V_pfsyncstats.pfsyncs_ipackets++;
772 
773 	/* Verify that we have a sync interface configured. */
774 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
775 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
776 		goto done;
777 
778 	/* verify that the packet came in on the right interface */
779 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
780 		V_pfsyncstats.pfsyncs_badif++;
781 		goto done;
782 	}
783 
784 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
785 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
786 	/* verify that the IP TTL is 255. */
787 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
788 		V_pfsyncstats.pfsyncs_badttl++;
789 		goto done;
790 	}
791 
792 	offset = ip->ip_hl << 2;
793 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
794 		V_pfsyncstats.pfsyncs_hdrops++;
795 		goto done;
796 	}
797 
798 	if (offset + sizeof(*ph) > m->m_len) {
799 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
800 			V_pfsyncstats.pfsyncs_hdrops++;
801 			return (IPPROTO_DONE);
802 		}
803 		ip = mtod(m, struct ip *);
804 	}
805 	ph = (struct pfsync_header *)((char *)ip + offset);
806 
807 	/* verify the version */
808 	if (ph->version != PFSYNC_VERSION) {
809 		V_pfsyncstats.pfsyncs_badver++;
810 		goto done;
811 	}
812 
813 	len = ntohs(ph->len) + offset;
814 	if (m->m_pkthdr.len < len) {
815 		V_pfsyncstats.pfsyncs_badlen++;
816 		goto done;
817 	}
818 
819 	/*
820 	 * Trusting pf_chksum during packet processing, as well as seeking
821 	 * in interface name tree, require holding PF_RULES_RLOCK().
822 	 */
823 	PF_RULES_RLOCK();
824 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
825 		flags = PFSYNC_SI_CKSUM;
826 
827 	offset += sizeof(*ph);
828 	while (offset <= len - sizeof(subh)) {
829 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
830 		offset += sizeof(subh);
831 
832 		if (subh.action >= PFSYNC_ACT_MAX) {
833 			V_pfsyncstats.pfsyncs_badact++;
834 			PF_RULES_RUNLOCK();
835 			goto done;
836 		}
837 
838 		count = ntohs(subh.count);
839 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
840 		rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
841 		if (rv == -1) {
842 			PF_RULES_RUNLOCK();
843 			return (IPPROTO_DONE);
844 		}
845 
846 		offset += rv;
847 	}
848 	PF_RULES_RUNLOCK();
849 
850 done:
851 	m_freem(m);
852 	return (IPPROTO_DONE);
853 }
854 #endif
855 
856 #ifdef INET6
857 static int
858 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused)
859 {
860 	struct pfsync_softc *sc = V_pfsyncif;
861 	struct mbuf *m = *mp;
862 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
863 	struct pfsync_header *ph;
864 	struct pfsync_subheader subh;
865 
866 	int offset, len, flags = 0;
867 	int rv;
868 	uint16_t count;
869 
870 	PF_RULES_RLOCK_TRACKER;
871 
872 	*mp = NULL;
873 	V_pfsyncstats.pfsyncs_ipackets++;
874 
875 	/* Verify that we have a sync interface configured. */
876 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
877 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878 		goto done;
879 
880 	/* verify that the packet came in on the right interface */
881 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
882 		V_pfsyncstats.pfsyncs_badif++;
883 		goto done;
884 	}
885 
886 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
887 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
888 	/* verify that the IP TTL is 255. */
889 	if (ip6->ip6_hlim != PFSYNC_DFLTTL) {
890 		V_pfsyncstats.pfsyncs_badttl++;
891 		goto done;
892 	}
893 
894 
895 	offset = sizeof(*ip6);
896 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
897 		V_pfsyncstats.pfsyncs_hdrops++;
898 		goto done;
899 	}
900 
901 	if (offset + sizeof(*ph) > m->m_len) {
902 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
903 			V_pfsyncstats.pfsyncs_hdrops++;
904 			return (IPPROTO_DONE);
905 		}
906 		ip6 = mtod(m, struct ip6_hdr *);
907 	}
908 	ph = (struct pfsync_header *)((char *)ip6 + offset);
909 
910 	/* verify the version */
911 	if (ph->version != PFSYNC_VERSION) {
912 		V_pfsyncstats.pfsyncs_badver++;
913 		goto done;
914 	}
915 
916 	len = ntohs(ph->len) + offset;
917 	if (m->m_pkthdr.len < len) {
918 		V_pfsyncstats.pfsyncs_badlen++;
919 		goto done;
920 	}
921 
922 	/*
923 	 * Trusting pf_chksum during packet processing, as well as seeking
924 	 * in interface name tree, require holding PF_RULES_RLOCK().
925 	 */
926 	PF_RULES_RLOCK();
927 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
928 		flags = PFSYNC_SI_CKSUM;
929 
930 	offset += sizeof(*ph);
931 	while (offset <= len - sizeof(subh)) {
932 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
933 		offset += sizeof(subh);
934 
935 		if (subh.action >= PFSYNC_ACT_MAX) {
936 			V_pfsyncstats.pfsyncs_badact++;
937 			PF_RULES_RUNLOCK();
938 			goto done;
939 		}
940 
941 		count = ntohs(subh.count);
942 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
943 		rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
944 		if (rv == -1) {
945 			PF_RULES_RUNLOCK();
946 			return (IPPROTO_DONE);
947 		}
948 
949 		offset += rv;
950 	}
951 	PF_RULES_RUNLOCK();
952 
953 done:
954 	m_freem(m);
955 	return (IPPROTO_DONE);
956 }
957 #endif
958 
959 static int
960 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action)
961 {
962 	struct pfsync_clr *clr;
963 	struct mbuf *mp;
964 	int len = sizeof(*clr) * count;
965 	int i, offp;
966 	u_int32_t creatorid;
967 
968 	mp = m_pulldown(m, offset, len, &offp);
969 	if (mp == NULL) {
970 		V_pfsyncstats.pfsyncs_badlen++;
971 		return (-1);
972 	}
973 	clr = (struct pfsync_clr *)(mp->m_data + offp);
974 
975 	for (i = 0; i < count; i++) {
976 		creatorid = clr[i].creatorid;
977 
978 		if (clr[i].ifname[0] != '\0' &&
979 		    pfi_kkif_find(clr[i].ifname) == NULL)
980 			continue;
981 
982 		for (int i = 0; i <= V_pf_hashmask; i++) {
983 			struct pf_idhash *ih = &V_pf_idhash[i];
984 			struct pf_kstate *s;
985 relock:
986 			PF_HASHROW_LOCK(ih);
987 			LIST_FOREACH(s, &ih->states, entry) {
988 				if (s->creatorid == creatorid) {
989 					s->state_flags |= PFSTATE_NOSYNC;
990 					pf_unlink_state(s);
991 					goto relock;
992 				}
993 			}
994 			PF_HASHROW_UNLOCK(ih);
995 		}
996 	}
997 
998 	return (len);
999 }
1000 
1001 static int
1002 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action)
1003 {
1004 	struct mbuf *mp;
1005 	union pfsync_state_union *sa, *sp;
1006 	int i, offp, total_len, msg_version, msg_len;
1007 
1008 	switch (action) {
1009 		case PFSYNC_ACT_INS_1301:
1010 			msg_len = sizeof(struct pfsync_state_1301);
1011 			total_len = msg_len * count;
1012 			msg_version = PFSYNC_MSG_VERSION_1301;
1013 			break;
1014 		case PFSYNC_ACT_INS_1400:
1015 			msg_len = sizeof(struct pfsync_state_1400);
1016 			total_len = msg_len * count;
1017 			msg_version = PFSYNC_MSG_VERSION_1400;
1018 			break;
1019 		default:
1020 			V_pfsyncstats.pfsyncs_badact++;
1021 			return (-1);
1022 	}
1023 
1024 	mp = m_pulldown(m, offset, total_len, &offp);
1025 	if (mp == NULL) {
1026 		V_pfsyncstats.pfsyncs_badlen++;
1027 		return (-1);
1028 	}
1029 	sa = (union pfsync_state_union *)(mp->m_data + offp);
1030 
1031 	for (i = 0; i < count; i++) {
1032 		sp = (union pfsync_state_union *)((char *)sa + msg_len * i);
1033 
1034 		/* Check for invalid values. */
1035 		if (sp->pfs_1301.timeout >= PFTM_MAX ||
1036 		    sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1037 		    sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST ||
1038 		    sp->pfs_1301.direction > PF_OUT ||
1039 		    (sp->pfs_1301.af != AF_INET &&
1040 		    sp->pfs_1301.af != AF_INET6)) {
1041 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1042 				printf("%s: invalid value\n", __func__);
1043 			V_pfsyncstats.pfsyncs_badval++;
1044 			continue;
1045 		}
1046 
1047 		if (pfsync_state_import(sp, flags, msg_version) == ENOMEM)
1048 			/* Drop out, but process the rest of the actions. */
1049 			break;
1050 	}
1051 
1052 	return (total_len);
1053 }
1054 
1055 static int
1056 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action)
1057 {
1058 	struct pfsync_ins_ack *ia, *iaa;
1059 	struct pf_kstate *st;
1060 
1061 	struct mbuf *mp;
1062 	int len = count * sizeof(*ia);
1063 	int offp, i;
1064 
1065 	mp = m_pulldown(m, offset, len, &offp);
1066 	if (mp == NULL) {
1067 		V_pfsyncstats.pfsyncs_badlen++;
1068 		return (-1);
1069 	}
1070 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1071 
1072 	for (i = 0; i < count; i++) {
1073 		ia = &iaa[i];
1074 
1075 		st = pf_find_state_byid(ia->id, ia->creatorid);
1076 		if (st == NULL)
1077 			continue;
1078 
1079 		if (st->state_flags & PFSTATE_ACK) {
1080 			pfsync_undefer_state(st, 0);
1081 		}
1082 		PF_STATE_UNLOCK(st);
1083 	}
1084 	/*
1085 	 * XXX this is not yet implemented, but we know the size of the
1086 	 * message so we can skip it.
1087 	 */
1088 
1089 	return (count * sizeof(struct pfsync_ins_ack));
1090 }
1091 
1092 static int
1093 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src,
1094     struct pfsync_state_peer *dst)
1095 {
1096 	int sync = 0;
1097 
1098 	PF_STATE_LOCK_ASSERT(st);
1099 
1100 	/*
1101 	 * The state should never go backwards except
1102 	 * for syn-proxy states.  Neither should the
1103 	 * sequence window slide backwards.
1104 	 */
1105 	if ((st->src.state > src->state &&
1106 	    (st->src.state < PF_TCPS_PROXY_SRC ||
1107 	    src->state >= PF_TCPS_PROXY_SRC)) ||
1108 
1109 	    (st->src.state == src->state &&
1110 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
1111 		sync++;
1112 	else
1113 		pf_state_peer_ntoh(src, &st->src);
1114 
1115 	if ((st->dst.state > dst->state) ||
1116 
1117 	    (st->dst.state >= TCPS_SYN_SENT &&
1118 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
1119 		sync++;
1120 	else
1121 		pf_state_peer_ntoh(dst, &st->dst);
1122 
1123 	return (sync);
1124 }
1125 
1126 static int
1127 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action)
1128 {
1129 	struct pfsync_softc *sc = V_pfsyncif;
1130 	union pfsync_state_union *sa, *sp;
1131 	struct pf_kstate *st;
1132 	struct mbuf *mp;
1133 	int sync, offp, i, total_len, msg_len, msg_version;
1134 
1135 	switch (action) {
1136 		case PFSYNC_ACT_UPD_1301:
1137 			msg_len = sizeof(struct pfsync_state_1301);
1138 			total_len = msg_len * count;
1139 			msg_version = PFSYNC_MSG_VERSION_1301;
1140 			break;
1141 		case PFSYNC_ACT_UPD_1400:
1142 			msg_len = sizeof(struct pfsync_state_1400);
1143 			total_len = msg_len * count;
1144 			msg_version = PFSYNC_MSG_VERSION_1400;
1145 			break;
1146 		default:
1147 			V_pfsyncstats.pfsyncs_badact++;
1148 			return (-1);
1149 	}
1150 
1151 	mp = m_pulldown(m, offset, total_len, &offp);
1152 	if (mp == NULL) {
1153 		V_pfsyncstats.pfsyncs_badlen++;
1154 		return (-1);
1155 	}
1156 	sa = (union pfsync_state_union *)(mp->m_data + offp);
1157 
1158 	for (i = 0; i < count; i++) {
1159 		sp = (union pfsync_state_union *)((char *)sa + msg_len * i);
1160 
1161 		/* check for invalid values */
1162 		if (sp->pfs_1301.timeout >= PFTM_MAX ||
1163 		    sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1164 		    sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) {
1165 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1166 				printf("pfsync_input: PFSYNC_ACT_UPD: "
1167 				    "invalid value\n");
1168 			}
1169 			V_pfsyncstats.pfsyncs_badval++;
1170 			continue;
1171 		}
1172 
1173 		st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid);
1174 		if (st == NULL) {
1175 			/* insert the update */
1176 			if (pfsync_state_import(sp, flags, msg_version))
1177 				V_pfsyncstats.pfsyncs_badstate++;
1178 			continue;
1179 		}
1180 
1181 		if (st->state_flags & PFSTATE_ACK) {
1182 			pfsync_undefer_state(st, 1);
1183 		}
1184 
1185 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1186 			sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst);
1187 		else {
1188 			sync = 0;
1189 
1190 			/*
1191 			 * Non-TCP protocol state machine always go
1192 			 * forwards
1193 			 */
1194 			if (st->src.state > sp->pfs_1301.src.state)
1195 				sync++;
1196 			else
1197 				pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
1198 			if (st->dst.state > sp->pfs_1301.dst.state)
1199 				sync++;
1200 			else
1201 				pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1202 		}
1203 		if (sync < 2) {
1204 			pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst);
1205 			pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1206 			st->expire = pf_get_uptime();
1207 			st->timeout = sp->pfs_1301.timeout;
1208 		}
1209 		st->pfsync_time = time_uptime;
1210 
1211 		if (sync) {
1212 			V_pfsyncstats.pfsyncs_stale++;
1213 
1214 			pfsync_update_state(st);
1215 			PF_STATE_UNLOCK(st);
1216 			pfsync_push_all(sc);
1217 			continue;
1218 		}
1219 		PF_STATE_UNLOCK(st);
1220 	}
1221 
1222 	return (total_len);
1223 }
1224 
1225 static int
1226 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action)
1227 {
1228 	struct pfsync_softc *sc = V_pfsyncif;
1229 	struct pfsync_upd_c *ua, *up;
1230 	struct pf_kstate *st;
1231 	int len = count * sizeof(*up);
1232 	int sync;
1233 	struct mbuf *mp;
1234 	int offp, i;
1235 
1236 	mp = m_pulldown(m, offset, len, &offp);
1237 	if (mp == NULL) {
1238 		V_pfsyncstats.pfsyncs_badlen++;
1239 		return (-1);
1240 	}
1241 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1242 
1243 	for (i = 0; i < count; i++) {
1244 		up = &ua[i];
1245 
1246 		/* check for invalid values */
1247 		if (up->timeout >= PFTM_MAX ||
1248 		    up->src.state > PF_TCPS_PROXY_DST ||
1249 		    up->dst.state > PF_TCPS_PROXY_DST) {
1250 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1251 				printf("pfsync_input: "
1252 				    "PFSYNC_ACT_UPD_C: "
1253 				    "invalid value\n");
1254 			}
1255 			V_pfsyncstats.pfsyncs_badval++;
1256 			continue;
1257 		}
1258 
1259 		st = pf_find_state_byid(up->id, up->creatorid);
1260 		if (st == NULL) {
1261 			/* We don't have this state. Ask for it. */
1262 			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1263 			pfsync_request_update(up->creatorid, up->id);
1264 			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1265 			continue;
1266 		}
1267 
1268 		if (st->state_flags & PFSTATE_ACK) {
1269 			pfsync_undefer_state(st, 1);
1270 		}
1271 
1272 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1273 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1274 		else {
1275 			sync = 0;
1276 
1277 			/*
1278 			 * Non-TCP protocol state machine always go
1279 			 * forwards
1280 			 */
1281 			if (st->src.state > up->src.state)
1282 				sync++;
1283 			else
1284 				pf_state_peer_ntoh(&up->src, &st->src);
1285 			if (st->dst.state > up->dst.state)
1286 				sync++;
1287 			else
1288 				pf_state_peer_ntoh(&up->dst, &st->dst);
1289 		}
1290 		if (sync < 2) {
1291 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1292 			pf_state_peer_ntoh(&up->dst, &st->dst);
1293 			st->expire = pf_get_uptime();
1294 			st->timeout = up->timeout;
1295 		}
1296 		st->pfsync_time = time_uptime;
1297 
1298 		if (sync) {
1299 			V_pfsyncstats.pfsyncs_stale++;
1300 
1301 			pfsync_update_state(st);
1302 			PF_STATE_UNLOCK(st);
1303 			pfsync_push_all(sc);
1304 			continue;
1305 		}
1306 		PF_STATE_UNLOCK(st);
1307 	}
1308 
1309 	return (len);
1310 }
1311 
1312 static int
1313 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action)
1314 {
1315 	struct pfsync_upd_req *ur, *ura;
1316 	struct mbuf *mp;
1317 	int len = count * sizeof(*ur);
1318 	int i, offp;
1319 
1320 	struct pf_kstate *st;
1321 
1322 	mp = m_pulldown(m, offset, len, &offp);
1323 	if (mp == NULL) {
1324 		V_pfsyncstats.pfsyncs_badlen++;
1325 		return (-1);
1326 	}
1327 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1328 
1329 	for (i = 0; i < count; i++) {
1330 		ur = &ura[i];
1331 
1332 		if (ur->id == 0 && ur->creatorid == 0)
1333 			pfsync_bulk_start();
1334 		else {
1335 			st = pf_find_state_byid(ur->id, ur->creatorid);
1336 			if (st == NULL) {
1337 				V_pfsyncstats.pfsyncs_badstate++;
1338 				continue;
1339 			}
1340 			if (st->state_flags & PFSTATE_NOSYNC) {
1341 				PF_STATE_UNLOCK(st);
1342 				continue;
1343 			}
1344 
1345 			pfsync_update_state_req(st);
1346 			PF_STATE_UNLOCK(st);
1347 		}
1348 	}
1349 
1350 	return (len);
1351 }
1352 
1353 static int
1354 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action)
1355 {
1356 	struct mbuf *mp;
1357 	struct pfsync_del_c *sa, *sp;
1358 	struct pf_kstate *st;
1359 	int len = count * sizeof(*sp);
1360 	int offp, i;
1361 
1362 	mp = m_pulldown(m, offset, len, &offp);
1363 	if (mp == NULL) {
1364 		V_pfsyncstats.pfsyncs_badlen++;
1365 		return (-1);
1366 	}
1367 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1368 
1369 	for (i = 0; i < count; i++) {
1370 		sp = &sa[i];
1371 
1372 		st = pf_find_state_byid(sp->id, sp->creatorid);
1373 		if (st == NULL) {
1374 			V_pfsyncstats.pfsyncs_badstate++;
1375 			continue;
1376 		}
1377 
1378 		st->state_flags |= PFSTATE_NOSYNC;
1379 		pf_unlink_state(st);
1380 	}
1381 
1382 	return (len);
1383 }
1384 
1385 static int
1386 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action)
1387 {
1388 	struct pfsync_softc *sc = V_pfsyncif;
1389 	struct pfsync_bus *bus;
1390 	struct mbuf *mp;
1391 	int len = count * sizeof(*bus);
1392 	int offp;
1393 
1394 	PFSYNC_BLOCK(sc);
1395 
1396 	/* If we're not waiting for a bulk update, who cares. */
1397 	if (sc->sc_ureq_sent == 0) {
1398 		PFSYNC_BUNLOCK(sc);
1399 		return (len);
1400 	}
1401 
1402 	mp = m_pulldown(m, offset, len, &offp);
1403 	if (mp == NULL) {
1404 		PFSYNC_BUNLOCK(sc);
1405 		V_pfsyncstats.pfsyncs_badlen++;
1406 		return (-1);
1407 	}
1408 	bus = (struct pfsync_bus *)(mp->m_data + offp);
1409 
1410 	switch (bus->status) {
1411 	case PFSYNC_BUS_START:
1412 		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1413 		    V_pf_limits[PF_LIMIT_STATES].limit /
1414 		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1415 		    sizeof(union pfsync_state_union)),
1416 		    pfsync_bulk_fail, sc);
1417 		if (V_pf_status.debug >= PF_DEBUG_MISC)
1418 			printf("pfsync: received bulk update start\n");
1419 		break;
1420 
1421 	case PFSYNC_BUS_END:
1422 		if (time_uptime - ntohl(bus->endtime) >=
1423 		    sc->sc_ureq_sent) {
1424 			/* that's it, we're happy */
1425 			sc->sc_ureq_sent = 0;
1426 			sc->sc_bulk_tries = 0;
1427 			callout_stop(&sc->sc_bulkfail_tmo);
1428 			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1429 				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1430 				    "pfsync bulk done");
1431 			sc->sc_flags |= PFSYNCF_OK;
1432 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1433 				printf("pfsync: received valid "
1434 				    "bulk update end\n");
1435 		} else {
1436 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1437 				printf("pfsync: received invalid "
1438 				    "bulk update end: bad timestamp\n");
1439 		}
1440 		break;
1441 	}
1442 	PFSYNC_BUNLOCK(sc);
1443 
1444 	return (len);
1445 }
1446 
1447 static int
1448 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action)
1449 {
1450 	int len = count * sizeof(struct pfsync_tdb);
1451 
1452 #if defined(IPSEC)
1453 	struct pfsync_tdb *tp;
1454 	struct mbuf *mp;
1455 	int offp;
1456 	int i;
1457 	int s;
1458 
1459 	mp = m_pulldown(m, offset, len, &offp);
1460 	if (mp == NULL) {
1461 		V_pfsyncstats.pfsyncs_badlen++;
1462 		return (-1);
1463 	}
1464 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1465 
1466 	for (i = 0; i < count; i++)
1467 		pfsync_update_net_tdb(&tp[i]);
1468 #endif
1469 
1470 	return (len);
1471 }
1472 
1473 #if defined(IPSEC)
1474 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1475 static void
1476 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1477 {
1478 	struct tdb		*tdb;
1479 	int			 s;
1480 
1481 	/* check for invalid values */
1482 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1483 	    (pt->dst.sa.sa_family != AF_INET &&
1484 	    pt->dst.sa.sa_family != AF_INET6))
1485 		goto bad;
1486 
1487 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1488 	if (tdb) {
1489 		pt->rpl = ntohl(pt->rpl);
1490 		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1491 
1492 		/* Neither replay nor byte counter should ever decrease. */
1493 		if (pt->rpl < tdb->tdb_rpl ||
1494 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1495 			goto bad;
1496 		}
1497 
1498 		tdb->tdb_rpl = pt->rpl;
1499 		tdb->tdb_cur_bytes = pt->cur_bytes;
1500 	}
1501 	return;
1502 
1503 bad:
1504 	if (V_pf_status.debug >= PF_DEBUG_MISC)
1505 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1506 		    "invalid value\n");
1507 	V_pfsyncstats.pfsyncs_badstate++;
1508 	return;
1509 }
1510 #endif
1511 
1512 static int
1513 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action)
1514 {
1515 	/* check if we are at the right place in the packet */
1516 	if (offset != m->m_pkthdr.len)
1517 		V_pfsyncstats.pfsyncs_badlen++;
1518 
1519 	/* we're done. free and let the caller return */
1520 	m_freem(m);
1521 	return (-1);
1522 }
1523 
1524 static int
1525 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action)
1526 {
1527 	V_pfsyncstats.pfsyncs_badact++;
1528 
1529 	m_freem(m);
1530 	return (-1);
1531 }
1532 
1533 static int
1534 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1535 	struct route *rt)
1536 {
1537 	m_freem(m);
1538 	return (0);
1539 }
1540 
1541 /* ARGSUSED */
1542 static int
1543 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1544 {
1545 	struct pfsync_softc *sc = ifp->if_softc;
1546 	struct ifreq *ifr = (struct ifreq *)data;
1547 	struct pfsyncreq pfsyncr;
1548 	size_t nvbuflen;
1549 	int error;
1550 	int c;
1551 
1552 	switch (cmd) {
1553 	case SIOCSIFFLAGS:
1554 		PFSYNC_LOCK(sc);
1555 		if (ifp->if_flags & IFF_UP) {
1556 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1557 			PFSYNC_UNLOCK(sc);
1558 			pfsync_pointers_init();
1559 		} else {
1560 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1561 			PFSYNC_UNLOCK(sc);
1562 			pfsync_pointers_uninit();
1563 		}
1564 		break;
1565 	case SIOCSIFMTU:
1566 		if (!sc->sc_sync_if ||
1567 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1568 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1569 			return (EINVAL);
1570 		if (ifr->ifr_mtu < ifp->if_mtu) {
1571 			for (c = 0; c < pfsync_buckets; c++) {
1572 				PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1573 				if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
1574 					pfsync_sendout(1, c);
1575 				PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1576 			}
1577 		}
1578 		ifp->if_mtu = ifr->ifr_mtu;
1579 		break;
1580 	case SIOCGETPFSYNC:
1581 		bzero(&pfsyncr, sizeof(pfsyncr));
1582 		PFSYNC_LOCK(sc);
1583 		if (sc->sc_sync_if) {
1584 			strlcpy(pfsyncr.pfsyncr_syncdev,
1585 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1586 		}
1587 		pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
1588 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1589 		pfsyncr.pfsyncr_defer = sc->sc_flags;
1590 		PFSYNC_UNLOCK(sc);
1591 		return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
1592 		    sizeof(pfsyncr)));
1593 
1594 	case SIOCGETPFSYNCNV:
1595 	    {
1596 		nvlist_t *nvl_syncpeer;
1597 		nvlist_t *nvl = nvlist_create(0);
1598 
1599 		if (nvl == NULL)
1600 			return (ENOMEM);
1601 
1602 		if (sc->sc_sync_if)
1603 			nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname);
1604 		nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates);
1605 		nvlist_add_number(nvl, "flags", sc->sc_flags);
1606 		nvlist_add_number(nvl, "version", sc->sc_version);
1607 		if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL)
1608 			nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer);
1609 
1610 		void *packed = NULL;
1611 		packed = nvlist_pack(nvl, &nvbuflen);
1612 		if (packed == NULL) {
1613 			free(packed, M_NVLIST);
1614 			nvlist_destroy(nvl);
1615 			return (ENOMEM);
1616 		}
1617 
1618 		if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
1619 			ifr->ifr_cap_nv.length = nvbuflen;
1620 			ifr->ifr_cap_nv.buffer = NULL;
1621 			free(packed, M_NVLIST);
1622 			nvlist_destroy(nvl);
1623 			return (EFBIG);
1624 		}
1625 
1626 		ifr->ifr_cap_nv.length = nvbuflen;
1627 		error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen);
1628 
1629 		nvlist_destroy(nvl);
1630 		nvlist_destroy(nvl_syncpeer);
1631 		free(packed, M_NVLIST);
1632 		break;
1633 	    }
1634 
1635 	case SIOCSETPFSYNC:
1636 	    {
1637 		struct pfsync_kstatus status;
1638 
1639 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1640 			return (error);
1641 		if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
1642 		    sizeof(pfsyncr))))
1643 			return (error);
1644 
1645 		memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1646 		pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status);
1647 
1648 		error = pfsync_kstatus_to_softc(&status, sc);
1649 		return (error);
1650 	    }
1651 	case SIOCSETPFSYNCNV:
1652 	    {
1653 		struct pfsync_kstatus status;
1654 		void *data;
1655 		nvlist_t *nvl;
1656 
1657 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1658 			return (error);
1659 		if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
1660 			return (EINVAL);
1661 
1662 		data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
1663 
1664 		if ((error = copyin(ifr->ifr_cap_nv.buffer, data,
1665 		    ifr->ifr_cap_nv.length)) != 0) {
1666 			free(data, M_TEMP);
1667 			return (error);
1668 		}
1669 
1670 		if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) {
1671 			free(data, M_TEMP);
1672 			return (EINVAL);
1673 		}
1674 
1675 		memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1676 		pfsync_nvstatus_to_kstatus(nvl, &status);
1677 
1678 		nvlist_destroy(nvl);
1679 		free(data, M_TEMP);
1680 
1681 		error = pfsync_kstatus_to_softc(&status, sc);
1682 		return (error);
1683 	    }
1684 	default:
1685 		return (ENOTTY);
1686 	}
1687 
1688 	return (0);
1689 }
1690 
1691 static void
1692 pfsync_out_state_1301(struct pf_kstate *st, void *buf)
1693 {
1694 	union pfsync_state_union *sp = buf;
1695 
1696 	pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301);
1697 }
1698 
1699 static void
1700 pfsync_out_state_1400(struct pf_kstate *st, void *buf)
1701 {
1702 	union pfsync_state_union *sp = buf;
1703 
1704 	pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400);
1705 }
1706 
1707 static void
1708 pfsync_out_iack(struct pf_kstate *st, void *buf)
1709 {
1710 	struct pfsync_ins_ack *iack = buf;
1711 
1712 	iack->id = st->id;
1713 	iack->creatorid = st->creatorid;
1714 }
1715 
1716 static void
1717 pfsync_out_upd_c(struct pf_kstate *st, void *buf)
1718 {
1719 	struct pfsync_upd_c *up = buf;
1720 
1721 	bzero(up, sizeof(*up));
1722 	up->id = st->id;
1723 	pf_state_peer_hton(&st->src, &up->src);
1724 	pf_state_peer_hton(&st->dst, &up->dst);
1725 	up->creatorid = st->creatorid;
1726 	up->timeout = st->timeout;
1727 }
1728 
1729 static void
1730 pfsync_out_del_c(struct pf_kstate *st, void *buf)
1731 {
1732 	struct pfsync_del_c *dp = buf;
1733 
1734 	dp->id = st->id;
1735 	dp->creatorid = st->creatorid;
1736 	st->state_flags |= PFSTATE_NOSYNC;
1737 }
1738 
1739 static void
1740 pfsync_drop_all(struct pfsync_softc *sc)
1741 {
1742 	struct pfsync_bucket *b;
1743 	int c;
1744 
1745 	for (c = 0; c < pfsync_buckets; c++) {
1746 		b = &sc->sc_buckets[c];
1747 
1748 		PFSYNC_BUCKET_LOCK(b);
1749 		pfsync_drop(sc, c);
1750 		PFSYNC_BUCKET_UNLOCK(b);
1751 	}
1752 }
1753 
1754 static void
1755 pfsync_drop(struct pfsync_softc *sc, int c)
1756 {
1757 	struct pf_kstate *st, *next;
1758 	struct pfsync_upd_req_item *ur;
1759 	struct pfsync_bucket *b;
1760 	enum pfsync_q_id q;
1761 
1762 	b = &sc->sc_buckets[c];
1763 	PFSYNC_BUCKET_LOCK_ASSERT(b);
1764 
1765 	for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1766 		if (TAILQ_EMPTY(&b->b_qs[q]))
1767 			continue;
1768 
1769 		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
1770 			KASSERT(st->sync_state == pfsync_qid_sstate[q],
1771 				("%s: st->sync_state %d == q %d",
1772 					__func__, st->sync_state, q));
1773 			st->sync_state = PFSYNC_S_NONE;
1774 			pf_release_state(st);
1775 		}
1776 		TAILQ_INIT(&b->b_qs[q]);
1777 	}
1778 
1779 	while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1780 		TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1781 		free(ur, M_PFSYNC);
1782 	}
1783 
1784 	b->b_len = PFSYNC_MINPKT;
1785 	free(b->b_plus, M_PFSYNC);
1786 	b->b_plus = NULL;
1787 	b->b_pluslen = 0;
1788 }
1789 
1790 static void
1791 pfsync_sendout(int schedswi, int c)
1792 {
1793 	struct pfsync_softc *sc = V_pfsyncif;
1794 	struct ifnet *ifp = sc->sc_ifp;
1795 	struct mbuf *m;
1796 	struct pfsync_header *ph;
1797 	struct pfsync_subheader *subh;
1798 	struct pf_kstate *st, *st_next;
1799 	struct pfsync_upd_req_item *ur;
1800 	struct pfsync_bucket *b = &sc->sc_buckets[c];
1801 	size_t len;
1802 	int aflen, offset, count = 0;
1803 	enum pfsync_q_id q;
1804 
1805 	KASSERT(sc != NULL, ("%s: null sc", __func__));
1806 	KASSERT(b->b_len > PFSYNC_MINPKT,
1807 	    ("%s: sc_len %zu", __func__, b->b_len));
1808 	PFSYNC_BUCKET_LOCK_ASSERT(b);
1809 
1810 	if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) {
1811 		pfsync_drop(sc, c);
1812 		return;
1813 	}
1814 
1815 	m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
1816 	if (m == NULL) {
1817 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1818 		V_pfsyncstats.pfsyncs_onomem++;
1819 		return;
1820 	}
1821 	m->m_data += max_linkhdr;
1822 	bzero(m->m_data, b->b_len);
1823 
1824 	len = b->b_len;
1825 
1826 	/* build the ip header */
1827 	switch (sc->sc_sync_peer.ss_family) {
1828 #ifdef INET
1829 	case AF_INET:
1830 	    {
1831 		struct ip *ip;
1832 
1833 		ip = mtod(m, struct ip *);
1834 		bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip));
1835 		aflen = offset = sizeof(*ip);
1836 
1837 		len -= sizeof(union inet_template) - sizeof(struct ip);
1838 		ip->ip_len = htons(len);
1839 		ip_fillid(ip);
1840 		break;
1841 	    }
1842 #endif
1843 #ifdef INET6
1844 	case AF_INET6:
1845 		{
1846 		struct ip6_hdr *ip6;
1847 
1848 		ip6 = mtod(m, struct ip6_hdr *);
1849 		bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6));
1850 		aflen = offset = sizeof(*ip6);
1851 
1852 		len -= sizeof(union inet_template) - sizeof(struct ip6_hdr);
1853 		ip6->ip6_plen = htons(len);
1854 		break;
1855 		}
1856 #endif
1857 	default:
1858 		m_freem(m);
1859 		pfsync_drop(sc, c);
1860 		return;
1861 	}
1862 	m->m_len = m->m_pkthdr.len = len;
1863 
1864 	/* build the pfsync header */
1865 	ph = (struct pfsync_header *)(m->m_data + offset);
1866 	offset += sizeof(*ph);
1867 
1868 	ph->version = PFSYNC_VERSION;
1869 	ph->len = htons(len - aflen);
1870 	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1871 
1872 	/* walk the queues */
1873 	for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1874 		if (TAILQ_EMPTY(&b->b_qs[q]))
1875 			continue;
1876 
1877 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1878 		offset += sizeof(*subh);
1879 
1880 		count = 0;
1881 		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
1882 			KASSERT(st->sync_state == pfsync_qid_sstate[q],
1883 				("%s: st->sync_state == q",
1884 					__func__));
1885 			/*
1886 			 * XXXGL: some of write methods do unlocked reads
1887 			 * of state data :(
1888 			 */
1889 			pfsync_qs[q].write(st, m->m_data + offset);
1890 			offset += pfsync_qs[q].len;
1891 			st->sync_state = PFSYNC_S_NONE;
1892 			pf_release_state(st);
1893 			count++;
1894 		}
1895 		TAILQ_INIT(&b->b_qs[q]);
1896 
1897 		subh->action = pfsync_qs[q].action;
1898 		subh->count = htons(count);
1899 		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
1900 	}
1901 
1902 	if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
1903 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1904 		offset += sizeof(*subh);
1905 
1906 		count = 0;
1907 		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1908 			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1909 
1910 			bcopy(&ur->ur_msg, m->m_data + offset,
1911 			    sizeof(ur->ur_msg));
1912 			offset += sizeof(ur->ur_msg);
1913 			free(ur, M_PFSYNC);
1914 			count++;
1915 		}
1916 
1917 		subh->action = PFSYNC_ACT_UPD_REQ;
1918 		subh->count = htons(count);
1919 		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
1920 	}
1921 
1922 	/* has someone built a custom region for us to add? */
1923 	if (b->b_plus != NULL) {
1924 		bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
1925 		offset += b->b_pluslen;
1926 
1927 		free(b->b_plus, M_PFSYNC);
1928 		b->b_plus = NULL;
1929 		b->b_pluslen = 0;
1930 	}
1931 
1932 	subh = (struct pfsync_subheader *)(m->m_data + offset);
1933 	offset += sizeof(*subh);
1934 
1935 	subh->action = PFSYNC_ACT_EOF;
1936 	subh->count = htons(1);
1937 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
1938 
1939 	/* we're done, let's put it on the wire */
1940 	if (bpf_peers_present(ifp->if_bpf)) {
1941 		m->m_data += aflen;
1942 		m->m_len = m->m_pkthdr.len = len - aflen;
1943 		bpf_mtap(ifp->if_bpf, m);
1944 		m->m_data -= aflen;
1945 		m->m_len = m->m_pkthdr.len = len;
1946 	}
1947 
1948 	if (sc->sc_sync_if == NULL) {
1949 		b->b_len = PFSYNC_MINPKT;
1950 		m_freem(m);
1951 		return;
1952 	}
1953 
1954 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
1955 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1956 	b->b_len = PFSYNC_MINPKT;
1957 
1958 	if (!_IF_QFULL(&b->b_snd))
1959 		_IF_ENQUEUE(&b->b_snd, m);
1960 	else {
1961 		m_freem(m);
1962 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
1963 	}
1964 	if (schedswi)
1965 		swi_sched(V_pfsync_swi_cookie, 0);
1966 }
1967 
1968 static void
1969 pfsync_insert_state(struct pf_kstate *st)
1970 {
1971 	struct pfsync_softc *sc = V_pfsyncif;
1972 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1973 
1974 	if (st->state_flags & PFSTATE_NOSYNC)
1975 		return;
1976 
1977 	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
1978 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1979 		st->state_flags |= PFSTATE_NOSYNC;
1980 		return;
1981 	}
1982 
1983 	KASSERT(st->sync_state == PFSYNC_S_NONE,
1984 		("%s: st->sync_state %u", __func__, st->sync_state));
1985 
1986 	PFSYNC_BUCKET_LOCK(b);
1987 	if (b->b_len == PFSYNC_MINPKT)
1988 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1989 
1990 	pfsync_q_ins(st, PFSYNC_S_INS, true);
1991 	PFSYNC_BUCKET_UNLOCK(b);
1992 
1993 	st->sync_updates = 0;
1994 }
1995 
1996 static int
1997 pfsync_defer(struct pf_kstate *st, struct mbuf *m)
1998 {
1999 	struct pfsync_softc *sc = V_pfsyncif;
2000 	struct pfsync_deferral *pd;
2001 	struct pfsync_bucket *b;
2002 
2003 	if (m->m_flags & (M_BCAST|M_MCAST))
2004 		return (0);
2005 
2006 	if (sc == NULL)
2007 		return (0);
2008 
2009 	b = pfsync_get_bucket(sc, st);
2010 
2011 	PFSYNC_LOCK(sc);
2012 
2013 	if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
2014 	    !(sc->sc_flags & PFSYNCF_DEFER)) {
2015 		PFSYNC_UNLOCK(sc);
2016 		return (0);
2017 	}
2018 
2019 	PFSYNC_BUCKET_LOCK(b);
2020 	PFSYNC_UNLOCK(sc);
2021 
2022 	if (b->b_deferred >= 128)
2023 		pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
2024 
2025 	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
2026 	if (pd == NULL) {
2027 		PFSYNC_BUCKET_UNLOCK(b);
2028 		return (0);
2029 	}
2030 	b->b_deferred++;
2031 
2032 	m->m_flags |= M_SKIP_FIREWALL;
2033 	st->state_flags |= PFSTATE_ACK;
2034 
2035 	pd->pd_sc = sc;
2036 	pd->pd_st = st;
2037 	pf_ref_state(st);
2038 	pd->pd_m = m;
2039 
2040 	TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
2041 	callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
2042 	callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000,
2043 	    pfsync_defer_tmo, pd);
2044 
2045 	pfsync_push(b);
2046 	PFSYNC_BUCKET_UNLOCK(b);
2047 
2048 	return (1);
2049 }
2050 
2051 static void
2052 pfsync_undefer(struct pfsync_deferral *pd, int drop)
2053 {
2054 	struct pfsync_softc *sc = pd->pd_sc;
2055 	struct mbuf *m = pd->pd_m;
2056 	struct pf_kstate *st = pd->pd_st;
2057 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2058 
2059 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2060 
2061 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2062 	b->b_deferred--;
2063 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
2064 	free(pd, M_PFSYNC);
2065 	pf_release_state(st);
2066 
2067 	if (drop)
2068 		m_freem(m);
2069 	else {
2070 		_IF_ENQUEUE(&b->b_snd, m);
2071 		pfsync_push(b);
2072 	}
2073 }
2074 
2075 static void
2076 pfsync_defer_tmo(void *arg)
2077 {
2078 	struct epoch_tracker et;
2079 	struct pfsync_deferral *pd = arg;
2080 	struct pfsync_softc *sc = pd->pd_sc;
2081 	struct mbuf *m = pd->pd_m;
2082 	struct pf_kstate *st = pd->pd_st;
2083 	struct pfsync_bucket *b;
2084 
2085 	CURVNET_SET(sc->sc_ifp->if_vnet);
2086 
2087 	b = pfsync_get_bucket(sc, st);
2088 
2089 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2090 
2091 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2092 	b->b_deferred--;
2093 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
2094 	PFSYNC_BUCKET_UNLOCK(b);
2095 	free(pd, M_PFSYNC);
2096 
2097 	if (sc->sc_sync_if == NULL) {
2098 		pf_release_state(st);
2099 		m_freem(m);
2100 		CURVNET_RESTORE();
2101 		return;
2102 	}
2103 
2104 	NET_EPOCH_ENTER(et);
2105 
2106 	pfsync_tx(sc, m);
2107 
2108 	pf_release_state(st);
2109 
2110 	CURVNET_RESTORE();
2111 	NET_EPOCH_EXIT(et);
2112 }
2113 
2114 static void
2115 pfsync_undefer_state_locked(struct pf_kstate *st, int drop)
2116 {
2117 	struct pfsync_softc *sc = V_pfsyncif;
2118 	struct pfsync_deferral *pd;
2119 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2120 
2121 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2122 
2123 	TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
2124 		 if (pd->pd_st == st) {
2125 			if (callout_stop(&pd->pd_tmo) > 0)
2126 				pfsync_undefer(pd, drop);
2127 
2128 			return;
2129 		}
2130 	}
2131 
2132 	panic("%s: unable to find deferred state", __func__);
2133 }
2134 
2135 static void
2136 pfsync_undefer_state(struct pf_kstate *st, int drop)
2137 {
2138 	struct pfsync_softc *sc = V_pfsyncif;
2139 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2140 
2141 	PFSYNC_BUCKET_LOCK(b);
2142 	pfsync_undefer_state_locked(st, drop);
2143 	PFSYNC_BUCKET_UNLOCK(b);
2144 }
2145 
2146 static struct pfsync_bucket*
2147 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st)
2148 {
2149 	int c = PF_IDHASH(st) % pfsync_buckets;
2150 	return &sc->sc_buckets[c];
2151 }
2152 
2153 static void
2154 pfsync_update_state(struct pf_kstate *st)
2155 {
2156 	struct pfsync_softc *sc = V_pfsyncif;
2157 	bool sync = false, ref = true;
2158 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2159 
2160 	PF_STATE_LOCK_ASSERT(st);
2161 	PFSYNC_BUCKET_LOCK(b);
2162 
2163 	if (st->state_flags & PFSTATE_ACK)
2164 		pfsync_undefer_state_locked(st, 0);
2165 	if (st->state_flags & PFSTATE_NOSYNC) {
2166 		if (st->sync_state != PFSYNC_S_NONE)
2167 			pfsync_q_del(st, true, b);
2168 		PFSYNC_BUCKET_UNLOCK(b);
2169 		return;
2170 	}
2171 
2172 	if (b->b_len == PFSYNC_MINPKT)
2173 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2174 
2175 	switch (st->sync_state) {
2176 	case PFSYNC_S_UPD_C:
2177 	case PFSYNC_S_UPD:
2178 	case PFSYNC_S_INS:
2179 		/* we're already handling it */
2180 
2181 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2182 			st->sync_updates++;
2183 			if (st->sync_updates >= sc->sc_maxupdates)
2184 				sync = true;
2185 		}
2186 		break;
2187 
2188 	case PFSYNC_S_IACK:
2189 		pfsync_q_del(st, false, b);
2190 		ref = false;
2191 		/* FALLTHROUGH */
2192 
2193 	case PFSYNC_S_NONE:
2194 		pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
2195 		st->sync_updates = 0;
2196 		break;
2197 
2198 	default:
2199 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2200 	}
2201 
2202 	if (sync || (time_uptime - st->pfsync_time) < 2)
2203 		pfsync_push(b);
2204 
2205 	PFSYNC_BUCKET_UNLOCK(b);
2206 }
2207 
2208 static void
2209 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2210 {
2211 	struct pfsync_softc *sc = V_pfsyncif;
2212 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2213 	struct pfsync_upd_req_item *item;
2214 	size_t nlen = sizeof(struct pfsync_upd_req);
2215 
2216 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2217 
2218 	/*
2219 	 * This code does a bit to prevent multiple update requests for the
2220 	 * same state being generated. It searches current subheader queue,
2221 	 * but it doesn't lookup into queue of already packed datagrams.
2222 	 */
2223 	TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
2224 		if (item->ur_msg.id == id &&
2225 		    item->ur_msg.creatorid == creatorid)
2226 			return;
2227 
2228 	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
2229 	if (item == NULL)
2230 		return; /* XXX stats */
2231 
2232 	item->ur_msg.id = id;
2233 	item->ur_msg.creatorid = creatorid;
2234 
2235 	if (TAILQ_EMPTY(&b->b_upd_req_list))
2236 		nlen += sizeof(struct pfsync_subheader);
2237 
2238 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2239 		pfsync_sendout(0, 0);
2240 
2241 		nlen = sizeof(struct pfsync_subheader) +
2242 		    sizeof(struct pfsync_upd_req);
2243 	}
2244 
2245 	TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
2246 	b->b_len += nlen;
2247 
2248 	pfsync_push(b);
2249 }
2250 
2251 static bool
2252 pfsync_update_state_req(struct pf_kstate *st)
2253 {
2254 	struct pfsync_softc *sc = V_pfsyncif;
2255 	bool ref = true, full = false;
2256 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2257 
2258 	PF_STATE_LOCK_ASSERT(st);
2259 	PFSYNC_BUCKET_LOCK(b);
2260 
2261 	if (st->state_flags & PFSTATE_NOSYNC) {
2262 		if (st->sync_state != PFSYNC_S_NONE)
2263 			pfsync_q_del(st, true, b);
2264 		PFSYNC_BUCKET_UNLOCK(b);
2265 		return (full);
2266 	}
2267 
2268 	switch (st->sync_state) {
2269 	case PFSYNC_S_UPD_C:
2270 	case PFSYNC_S_IACK:
2271 		pfsync_q_del(st, false, b);
2272 		ref = false;
2273 		/* FALLTHROUGH */
2274 
2275 	case PFSYNC_S_NONE:
2276 		pfsync_q_ins(st, PFSYNC_S_UPD, ref);
2277 		pfsync_push(b);
2278 		break;
2279 
2280 	case PFSYNC_S_INS:
2281 	case PFSYNC_S_UPD:
2282 	case PFSYNC_S_DEL_C:
2283 		/* we're already handling it */
2284 		break;
2285 
2286 	default:
2287 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2288 	}
2289 
2290 	if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union))
2291 		full = true;
2292 
2293 	PFSYNC_BUCKET_UNLOCK(b);
2294 
2295 	return (full);
2296 }
2297 
2298 static void
2299 pfsync_delete_state(struct pf_kstate *st)
2300 {
2301 	struct pfsync_softc *sc = V_pfsyncif;
2302 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2303 	bool ref = true;
2304 
2305 	PFSYNC_BUCKET_LOCK(b);
2306 	if (st->state_flags & PFSTATE_ACK)
2307 		pfsync_undefer_state_locked(st, 1);
2308 	if (st->state_flags & PFSTATE_NOSYNC) {
2309 		if (st->sync_state != PFSYNC_S_NONE)
2310 			pfsync_q_del(st, true, b);
2311 		PFSYNC_BUCKET_UNLOCK(b);
2312 		return;
2313 	}
2314 
2315 	if (b->b_len == PFSYNC_MINPKT)
2316 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2317 
2318 	switch (st->sync_state) {
2319 	case PFSYNC_S_INS:
2320 		/* We never got to tell the world so just forget about it. */
2321 		pfsync_q_del(st, true, b);
2322 		break;
2323 
2324 	case PFSYNC_S_UPD_C:
2325 	case PFSYNC_S_UPD:
2326 	case PFSYNC_S_IACK:
2327 		pfsync_q_del(st, false, b);
2328 		ref = false;
2329 		/* FALLTHROUGH */
2330 
2331 	case PFSYNC_S_NONE:
2332 		pfsync_q_ins(st, PFSYNC_S_DEL_C, ref);
2333 		break;
2334 
2335 	default:
2336 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2337 	}
2338 
2339 	PFSYNC_BUCKET_UNLOCK(b);
2340 }
2341 
2342 static void
2343 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2344 {
2345 	struct {
2346 		struct pfsync_subheader subh;
2347 		struct pfsync_clr clr;
2348 	} __packed r;
2349 
2350 	bzero(&r, sizeof(r));
2351 
2352 	r.subh.action = PFSYNC_ACT_CLR;
2353 	r.subh.count = htons(1);
2354 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
2355 
2356 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2357 	r.clr.creatorid = creatorid;
2358 
2359 	pfsync_send_plus(&r, sizeof(r));
2360 }
2361 
2362 static enum pfsync_q_id
2363 pfsync_sstate_to_qid(u_int8_t sync_state)
2364 {
2365 	struct pfsync_softc *sc = V_pfsyncif;
2366 
2367 	switch (sync_state) {
2368 		case PFSYNC_S_INS:
2369 			switch (sc->sc_version) {
2370 				case PFSYNC_MSG_VERSION_1301:
2371 					return PFSYNC_Q_INS_1301;
2372 				case PFSYNC_MSG_VERSION_1400:
2373 					return PFSYNC_Q_INS_1400;
2374 			}
2375 			break;
2376 		case PFSYNC_S_IACK:
2377 			return PFSYNC_Q_IACK;
2378 		case PFSYNC_S_UPD:
2379 			switch (sc->sc_version) {
2380 				case PFSYNC_MSG_VERSION_1301:
2381 					return PFSYNC_Q_UPD_1301;
2382 				case PFSYNC_MSG_VERSION_1400:
2383 					return PFSYNC_Q_UPD_1400;
2384 			}
2385 			break;
2386 		case PFSYNC_S_UPD_C:
2387 			return PFSYNC_Q_UPD_C;
2388 		case PFSYNC_S_DEL_C:
2389 			return PFSYNC_Q_DEL_C;
2390 		default:
2391 			panic("%s: Unsupported st->sync_state 0x%02x",
2392 			__func__, sync_state);
2393 	}
2394 
2395 	panic("%s: Unsupported pfsync_msg_version %d",
2396 	    __func__, sc->sc_version);
2397 }
2398 
2399 static void
2400 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref)
2401 {
2402 	enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state);
2403 	struct pfsync_softc *sc = V_pfsyncif;
2404 	size_t nlen = pfsync_qs[q].len;
2405 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2406 
2407 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2408 
2409 	KASSERT(st->sync_state == PFSYNC_S_NONE,
2410 		("%s: st->sync_state %u", __func__, st->sync_state));
2411 	KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2412 	    b->b_len));
2413 
2414 	if (TAILQ_EMPTY(&b->b_qs[q]))
2415 		nlen += sizeof(struct pfsync_subheader);
2416 
2417 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2418 		pfsync_sendout(1, b->b_id);
2419 
2420 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2421 	}
2422 
2423 	b->b_len += nlen;
2424 	st->sync_state = pfsync_qid_sstate[q];
2425 	TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
2426 	if (ref)
2427 		pf_ref_state(st);
2428 }
2429 
2430 static void
2431 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b)
2432 {
2433 	enum pfsync_q_id q;
2434 
2435 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2436 	KASSERT(st->sync_state != PFSYNC_S_NONE,
2437 		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2438 
2439 	q =  pfsync_sstate_to_qid(st->sync_state);
2440 	b->b_len -= pfsync_qs[q].len;
2441 	TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
2442 	st->sync_state = PFSYNC_S_NONE;
2443 	if (unref)
2444 		pf_release_state(st);
2445 
2446 	if (TAILQ_EMPTY(&b->b_qs[q]))
2447 		b->b_len -= sizeof(struct pfsync_subheader);
2448 }
2449 
2450 static void
2451 pfsync_bulk_start(void)
2452 {
2453 	struct pfsync_softc *sc = V_pfsyncif;
2454 
2455 	if (V_pf_status.debug >= PF_DEBUG_MISC)
2456 		printf("pfsync: received bulk update request\n");
2457 
2458 	PFSYNC_BLOCK(sc);
2459 
2460 	sc->sc_ureq_received = time_uptime;
2461 	sc->sc_bulk_hashid = 0;
2462 	sc->sc_bulk_stateid = 0;
2463 	pfsync_bulk_status(PFSYNC_BUS_START);
2464 	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2465 	PFSYNC_BUNLOCK(sc);
2466 }
2467 
2468 static void
2469 pfsync_bulk_update(void *arg)
2470 {
2471 	struct pfsync_softc *sc = arg;
2472 	struct pf_kstate *s;
2473 	int i;
2474 
2475 	PFSYNC_BLOCK_ASSERT(sc);
2476 	CURVNET_SET(sc->sc_ifp->if_vnet);
2477 
2478 	/*
2479 	 * Start with last state from previous invocation.
2480 	 * It may had gone, in this case start from the
2481 	 * hash slot.
2482 	 */
2483 	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2484 
2485 	if (s != NULL)
2486 		i = PF_IDHASH(s);
2487 	else
2488 		i = sc->sc_bulk_hashid;
2489 
2490 	for (; i <= V_pf_hashmask; i++) {
2491 		struct pf_idhash *ih = &V_pf_idhash[i];
2492 
2493 		if (s != NULL)
2494 			PF_HASHROW_ASSERT(ih);
2495 		else {
2496 			PF_HASHROW_LOCK(ih);
2497 			s = LIST_FIRST(&ih->states);
2498 		}
2499 
2500 		for (; s; s = LIST_NEXT(s, entry)) {
2501 			if (s->sync_state == PFSYNC_S_NONE &&
2502 			    s->timeout < PFTM_MAX &&
2503 			    s->pfsync_time <= sc->sc_ureq_received) {
2504 				if (pfsync_update_state_req(s)) {
2505 					/* We've filled a packet. */
2506 					sc->sc_bulk_hashid = i;
2507 					sc->sc_bulk_stateid = s->id;
2508 					sc->sc_bulk_creatorid = s->creatorid;
2509 					PF_HASHROW_UNLOCK(ih);
2510 					callout_reset(&sc->sc_bulk_tmo, 1,
2511 					    pfsync_bulk_update, sc);
2512 					goto full;
2513 				}
2514 			}
2515 		}
2516 		PF_HASHROW_UNLOCK(ih);
2517 	}
2518 
2519 	/* We're done. */
2520 	pfsync_bulk_status(PFSYNC_BUS_END);
2521 full:
2522 	CURVNET_RESTORE();
2523 }
2524 
2525 static void
2526 pfsync_bulk_status(u_int8_t status)
2527 {
2528 	struct {
2529 		struct pfsync_subheader subh;
2530 		struct pfsync_bus bus;
2531 	} __packed r;
2532 
2533 	struct pfsync_softc *sc = V_pfsyncif;
2534 
2535 	bzero(&r, sizeof(r));
2536 
2537 	r.subh.action = PFSYNC_ACT_BUS;
2538 	r.subh.count = htons(1);
2539 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2540 
2541 	r.bus.creatorid = V_pf_status.hostid;
2542 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2543 	r.bus.status = status;
2544 
2545 	pfsync_send_plus(&r, sizeof(r));
2546 }
2547 
2548 static void
2549 pfsync_bulk_fail(void *arg)
2550 {
2551 	struct pfsync_softc *sc = arg;
2552 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2553 
2554 	CURVNET_SET(sc->sc_ifp->if_vnet);
2555 
2556 	PFSYNC_BLOCK_ASSERT(sc);
2557 
2558 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2559 		/* Try again */
2560 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2561 		    pfsync_bulk_fail, V_pfsyncif);
2562 		PFSYNC_BUCKET_LOCK(b);
2563 		pfsync_request_update(0, 0);
2564 		PFSYNC_BUCKET_UNLOCK(b);
2565 	} else {
2566 		/* Pretend like the transfer was ok. */
2567 		sc->sc_ureq_sent = 0;
2568 		sc->sc_bulk_tries = 0;
2569 		PFSYNC_LOCK(sc);
2570 		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2571 			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
2572 			    "pfsync bulk fail");
2573 		sc->sc_flags |= PFSYNCF_OK;
2574 		PFSYNC_UNLOCK(sc);
2575 		if (V_pf_status.debug >= PF_DEBUG_MISC)
2576 			printf("pfsync: failed to receive bulk update\n");
2577 	}
2578 
2579 	CURVNET_RESTORE();
2580 }
2581 
2582 static void
2583 pfsync_send_plus(void *plus, size_t pluslen)
2584 {
2585 	struct pfsync_softc *sc = V_pfsyncif;
2586 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2587 	uint8_t *newplus;
2588 
2589 	PFSYNC_BUCKET_LOCK(b);
2590 
2591 	if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
2592 		pfsync_sendout(1, b->b_id);
2593 
2594 	newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT);
2595 	if (newplus == NULL)
2596 		goto out;
2597 
2598 	if (b->b_plus != NULL) {
2599 		memcpy(newplus, b->b_plus, b->b_pluslen);
2600 		free(b->b_plus, M_PFSYNC);
2601 	} else {
2602 		MPASS(b->b_pluslen == 0);
2603 	}
2604 	memcpy(newplus + b->b_pluslen, plus, pluslen);
2605 
2606 	b->b_plus = newplus;
2607 	b->b_pluslen += pluslen;
2608 	b->b_len += pluslen;
2609 
2610 	pfsync_sendout(1, b->b_id);
2611 
2612 out:
2613 	PFSYNC_BUCKET_UNLOCK(b);
2614 }
2615 
2616 static void
2617 pfsync_timeout(void *arg)
2618 {
2619 	struct pfsync_bucket *b = arg;
2620 
2621 	CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
2622 	PFSYNC_BUCKET_LOCK(b);
2623 	pfsync_push(b);
2624 	PFSYNC_BUCKET_UNLOCK(b);
2625 	CURVNET_RESTORE();
2626 }
2627 
2628 static void
2629 pfsync_push(struct pfsync_bucket *b)
2630 {
2631 
2632 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2633 
2634 	b->b_flags |= PFSYNCF_BUCKET_PUSH;
2635 	swi_sched(V_pfsync_swi_cookie, 0);
2636 }
2637 
2638 static void
2639 pfsync_push_all(struct pfsync_softc *sc)
2640 {
2641 	int c;
2642 	struct pfsync_bucket *b;
2643 
2644 	for (c = 0; c < pfsync_buckets; c++) {
2645 		b = &sc->sc_buckets[c];
2646 
2647 		PFSYNC_BUCKET_LOCK(b);
2648 		pfsync_push(b);
2649 		PFSYNC_BUCKET_UNLOCK(b);
2650 	}
2651 }
2652 
2653 static void
2654 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m)
2655 {
2656 	struct ip *ip;
2657 	int af, error = 0;
2658 
2659 	ip = mtod(m, struct ip *);
2660 	MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4));
2661 
2662 	af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6;
2663 
2664 	/*
2665 	 * We distinguish between a deferral packet and our
2666 	 * own pfsync packet based on M_SKIP_FIREWALL
2667 	 * flag. This is XXX.
2668 	 */
2669 	switch (af) {
2670 #ifdef INET
2671 	case AF_INET:
2672 		if (m->m_flags & M_SKIP_FIREWALL) {
2673 			error = ip_output(m, NULL, NULL, 0,
2674 			    NULL, NULL);
2675 		} else {
2676 			error = ip_output(m, NULL, NULL,
2677 			    IP_RAWOUTPUT, &sc->sc_imo, NULL);
2678 		}
2679 		break;
2680 #endif
2681 #ifdef INET6
2682 	case AF_INET6:
2683 		if (m->m_flags & M_SKIP_FIREWALL) {
2684 			error = ip6_output(m, NULL, NULL, 0,
2685 			    NULL, NULL, NULL);
2686 		} else {
2687 			error = ip6_output(m, NULL, NULL, 0,
2688 				&sc->sc_im6o, NULL, NULL);
2689 		}
2690 		break;
2691 #endif
2692 	}
2693 
2694 	if (error == 0)
2695 		V_pfsyncstats.pfsyncs_opackets++;
2696 	else
2697 		V_pfsyncstats.pfsyncs_oerrors++;
2698 
2699 }
2700 
2701 static void
2702 pfsyncintr(void *arg)
2703 {
2704 	struct epoch_tracker et;
2705 	struct pfsync_softc *sc = arg;
2706 	struct pfsync_bucket *b;
2707 	struct mbuf *m, *n;
2708 	int c;
2709 
2710 	NET_EPOCH_ENTER(et);
2711 	CURVNET_SET(sc->sc_ifp->if_vnet);
2712 
2713 	for (c = 0; c < pfsync_buckets; c++) {
2714 		b = &sc->sc_buckets[c];
2715 
2716 		PFSYNC_BUCKET_LOCK(b);
2717 		if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
2718 			pfsync_sendout(0, b->b_id);
2719 			b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
2720 		}
2721 		_IF_DEQUEUE_ALL(&b->b_snd, m);
2722 		PFSYNC_BUCKET_UNLOCK(b);
2723 
2724 		for (; m != NULL; m = n) {
2725 			n = m->m_nextpkt;
2726 			m->m_nextpkt = NULL;
2727 
2728 			pfsync_tx(sc, m);
2729 		}
2730 	}
2731 	CURVNET_RESTORE();
2732 	NET_EPOCH_EXIT(et);
2733 }
2734 
2735 static int
2736 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
2737     struct in_mfilter* imf, struct in6_mfilter* im6f)
2738 {
2739 #ifdef  INET
2740 	struct ip_moptions *imo = &sc->sc_imo;
2741 #endif
2742 #ifdef INET6
2743 	struct ip6_moptions *im6o = &sc->sc_im6o;
2744 	struct sockaddr_in6 *syncpeer_sa6 = NULL;
2745 #endif
2746 
2747 	if (!(ifp->if_flags & IFF_MULTICAST))
2748 		return (EADDRNOTAVAIL);
2749 
2750 	switch (sc->sc_sync_peer.ss_family) {
2751 #ifdef INET
2752 	case AF_INET:
2753 	{
2754 		int error;
2755 
2756 		ip_mfilter_init(&imo->imo_head);
2757 		imo->imo_multicast_vif = -1;
2758 		if ((error = in_joingroup(ifp,
2759 		    &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL,
2760 		    &imf->imf_inm)) != 0)
2761 			return (error);
2762 
2763 		ip_mfilter_insert(&imo->imo_head, imf);
2764 		imo->imo_multicast_ifp = ifp;
2765 		imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2766 		imo->imo_multicast_loop = 0;
2767 		break;
2768 	}
2769 #endif
2770 #ifdef INET6
2771 	case AF_INET6:
2772 	{
2773 		int error;
2774 
2775 		syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer;
2776 		if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL)))
2777 			return (error);
2778 
2779 		ip6_mfilter_init(&im6o->im6o_head);
2780 		if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL,
2781 		    &(im6f->im6f_in6m), 0)) != 0)
2782 			return (error);
2783 
2784 		ip6_mfilter_insert(&im6o->im6o_head, im6f);
2785 		im6o->im6o_multicast_ifp = ifp;
2786 		im6o->im6o_multicast_hlim = PFSYNC_DFLTTL;
2787 		im6o->im6o_multicast_loop = 0;
2788 		break;
2789 	}
2790 #endif
2791 	}
2792 
2793 	return (0);
2794 }
2795 
2796 static void
2797 pfsync_multicast_cleanup(struct pfsync_softc *sc)
2798 {
2799 #ifdef INET
2800 	struct ip_moptions *imo = &sc->sc_imo;
2801 	struct in_mfilter *imf;
2802 
2803 	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
2804 		ip_mfilter_remove(&imo->imo_head, imf);
2805 		in_leavegroup(imf->imf_inm, NULL);
2806 		ip_mfilter_free(imf);
2807 	}
2808 	imo->imo_multicast_ifp = NULL;
2809 #endif
2810 
2811 #ifdef INET6
2812 	struct ip6_moptions *im6o = &sc->sc_im6o;
2813 	struct in6_mfilter *im6f;
2814 
2815 	while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
2816 		ip6_mfilter_remove(&im6o->im6o_head, im6f);
2817 		in6_leavegroup(im6f->im6f_in6m, NULL);
2818 		ip6_mfilter_free(im6f);
2819 	}
2820 	im6o->im6o_multicast_ifp = NULL;
2821 #endif
2822 }
2823 
2824 void
2825 pfsync_detach_ifnet(struct ifnet *ifp)
2826 {
2827 	struct pfsync_softc *sc = V_pfsyncif;
2828 
2829 	if (sc == NULL)
2830 		return;
2831 
2832 	PFSYNC_LOCK(sc);
2833 
2834 	if (sc->sc_sync_if == ifp) {
2835 		/* We don't need mutlicast cleanup here, because the interface
2836 		 * is going away. We do need to ensure we don't try to do
2837 		 * cleanup later.
2838 		 */
2839 		ip_mfilter_init(&sc->sc_imo.imo_head);
2840 		sc->sc_imo.imo_multicast_ifp = NULL;
2841 		sc->sc_im6o.im6o_multicast_ifp = NULL;
2842 		sc->sc_sync_if = NULL;
2843 	}
2844 
2845 	PFSYNC_UNLOCK(sc);
2846 }
2847 
2848 static int
2849 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status)
2850 {
2851 	struct sockaddr_storage sa;
2852 	status->maxupdates = pfsyncr->pfsyncr_maxupdates;
2853 	status->flags = pfsyncr->pfsyncr_defer;
2854 
2855 	strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ);
2856 
2857 	memset(&sa, 0, sizeof(sa));
2858 	if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) {
2859 		struct sockaddr_in *in = (struct sockaddr_in *)&sa;
2860 		in->sin_family = AF_INET;
2861 		in->sin_len = sizeof(*in);
2862 		in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr;
2863 	}
2864 	status->syncpeer = sa;
2865 
2866 	return 0;
2867 }
2868 
2869 static int
2870 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc)
2871 {
2872 	struct ifnet *sifp;
2873 	struct in_mfilter *imf = NULL;
2874 	struct in6_mfilter *im6f = NULL;
2875 	int error;
2876 	int c;
2877 
2878 	if ((status->maxupdates < 0) || (status->maxupdates > 255))
2879 		return (EINVAL);
2880 
2881 	if (status->syncdev[0] == '\0')
2882 		sifp = NULL;
2883 	else if ((sifp = ifunit_ref(status->syncdev)) == NULL)
2884 		return (EINVAL);
2885 
2886 	switch (status->syncpeer.ss_family) {
2887 #ifdef INET
2888 	case AF_UNSPEC:
2889 	case AF_INET: {
2890 		struct sockaddr_in *status_sin;
2891 		status_sin = (struct sockaddr_in *)&(status->syncpeer);
2892 		if (sifp != NULL) {
2893 			if (status_sin->sin_addr.s_addr == 0 ||
2894 			    status_sin->sin_addr.s_addr ==
2895 			    htonl(INADDR_PFSYNC_GROUP)) {
2896 				status_sin->sin_family = AF_INET;
2897 				status_sin->sin_len = sizeof(*status_sin);
2898 				status_sin->sin_addr.s_addr =
2899 				    htonl(INADDR_PFSYNC_GROUP);
2900 			}
2901 
2902 			if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) {
2903 				imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
2904 			}
2905 		}
2906 		break;
2907 	}
2908 #endif
2909 #ifdef INET6
2910 	case AF_INET6: {
2911 		struct sockaddr_in6 *status_sin6;
2912 		status_sin6 = (struct sockaddr_in6*)&(status->syncpeer);
2913 		if (sifp != NULL) {
2914 			if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) ||
2915 			    IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr,
2916 				&in6addr_linklocal_pfsync_group)) {
2917 				status_sin6->sin6_family = AF_INET6;
2918 				status_sin6->sin6_len = sizeof(*status_sin6);
2919 				status_sin6->sin6_addr =
2920 				    in6addr_linklocal_pfsync_group;
2921 			}
2922 
2923 			if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) {
2924 				im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0);
2925 			}
2926 		}
2927 		break;
2928 	}
2929 #endif
2930 	}
2931 
2932 	PFSYNC_LOCK(sc);
2933 
2934 	switch (status->version) {
2935 		case PFSYNC_MSG_VERSION_UNSPECIFIED:
2936 			sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
2937 			break;
2938 		case PFSYNC_MSG_VERSION_1301:
2939 		case PFSYNC_MSG_VERSION_1400:
2940 			sc->sc_version = status->version;
2941 			break;
2942 		default:
2943 			PFSYNC_UNLOCK(sc);
2944 			return (EINVAL);
2945 	}
2946 
2947 	switch (status->syncpeer.ss_family) {
2948 	case AF_INET: {
2949 		struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer);
2950 		struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer;
2951 		sc_sin->sin_family = AF_INET;
2952 		sc_sin->sin_len = sizeof(*sc_sin);
2953 		if (status_sin->sin_addr.s_addr == 0) {
2954 			sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
2955 		} else {
2956 			sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr;
2957 		}
2958 		break;
2959 	}
2960 	case AF_INET6: {
2961 		struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer);
2962 		struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer;
2963 		sc_sin->sin6_family = AF_INET6;
2964 		sc_sin->sin6_len = sizeof(*sc_sin);
2965 		if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) {
2966 			sc_sin->sin6_addr = in6addr_linklocal_pfsync_group;
2967 		} else {
2968 			sc_sin->sin6_addr = status_sin->sin6_addr;
2969 		}
2970 		break;
2971 	}
2972 	}
2973 
2974 	sc->sc_maxupdates = status->maxupdates;
2975 	if (status->flags & PFSYNCF_DEFER) {
2976 		sc->sc_flags |= PFSYNCF_DEFER;
2977 		V_pfsync_defer_ptr = pfsync_defer;
2978 	} else {
2979 		sc->sc_flags &= ~PFSYNCF_DEFER;
2980 		V_pfsync_defer_ptr = NULL;
2981 	}
2982 
2983 	if (sifp == NULL) {
2984 		if (sc->sc_sync_if)
2985 			if_rele(sc->sc_sync_if);
2986 		sc->sc_sync_if = NULL;
2987 		pfsync_multicast_cleanup(sc);
2988 		PFSYNC_UNLOCK(sc);
2989 		return (0);
2990 	}
2991 
2992 	for (c = 0; c < pfsync_buckets; c++) {
2993 		PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
2994 		if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
2995 		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
2996 			(sc->sc_sync_if != NULL &&
2997 			    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
2998 			sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
2999 			pfsync_sendout(1, c);
3000 		PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
3001 	}
3002 
3003 	pfsync_multicast_cleanup(sc);
3004 
3005 	if (((sc->sc_sync_peer.ss_family == AF_INET) &&
3006 	    IN_MULTICAST(ntohl(((struct sockaddr_in *)
3007 	        &sc->sc_sync_peer)->sin_addr.s_addr))) ||
3008 	    ((sc->sc_sync_peer.ss_family == AF_INET6) &&
3009 	    IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*)
3010 	        &sc->sc_sync_peer)->sin6_addr))) {
3011 		error = pfsync_multicast_setup(sc, sifp, imf, im6f);
3012 		if (error) {
3013 			if_rele(sifp);
3014 			PFSYNC_UNLOCK(sc);
3015 #ifdef INET
3016 			if (imf != NULL)
3017 				ip_mfilter_free(imf);
3018 #endif
3019 #ifdef INET6
3020 			if (im6f != NULL)
3021 				ip6_mfilter_free(im6f);
3022 #endif
3023 			return (error);
3024 		}
3025 	}
3026 	if (sc->sc_sync_if)
3027 		if_rele(sc->sc_sync_if);
3028 	sc->sc_sync_if = sifp;
3029 
3030 	switch (sc->sc_sync_peer.ss_family) {
3031 #ifdef INET
3032 	case AF_INET: {
3033 		struct ip *ip;
3034 		ip = &sc->sc_template.ipv4;
3035 		bzero(ip, sizeof(*ip));
3036 		ip->ip_v = IPVERSION;
3037 		ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2;
3038 		ip->ip_tos = IPTOS_LOWDELAY;
3039 		/* len and id are set later. */
3040 		ip->ip_off = htons(IP_DF);
3041 		ip->ip_ttl = PFSYNC_DFLTTL;
3042 		ip->ip_p = IPPROTO_PFSYNC;
3043 		ip->ip_src.s_addr = INADDR_ANY;
3044 		ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
3045 		break;
3046 	}
3047 #endif
3048 #ifdef INET6
3049 	case AF_INET6: {
3050 		struct ip6_hdr *ip6;
3051 		ip6 = &sc->sc_template.ipv6;
3052 		bzero(ip6, sizeof(*ip6));
3053 		ip6->ip6_vfc = IPV6_VERSION;
3054 		ip6->ip6_hlim = PFSYNC_DFLTTL;
3055 		ip6->ip6_nxt = IPPROTO_PFSYNC;
3056 		ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr;
3057 
3058 		struct epoch_tracker et;
3059 		NET_EPOCH_ENTER(et);
3060 		in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0,
3061 		    sc->sc_sync_if, &ip6->ip6_src, NULL);
3062 		NET_EPOCH_EXIT(et);
3063 		break;
3064 	}
3065 #endif
3066 	}
3067 
3068 	/* Request a full state table update. */
3069 	if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
3070 		(*carp_demote_adj_p)(V_pfsync_carp_adj,
3071 		    "pfsync bulk start");
3072 	sc->sc_flags &= ~PFSYNCF_OK;
3073 	if (V_pf_status.debug >= PF_DEBUG_MISC)
3074 		printf("pfsync: requesting bulk update\n");
3075 	PFSYNC_UNLOCK(sc);
3076 	PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
3077 	pfsync_request_update(0, 0);
3078 	PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
3079 	PFSYNC_BLOCK(sc);
3080 	sc->sc_ureq_sent = time_uptime;
3081 	callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc);
3082 	PFSYNC_BUNLOCK(sc);
3083 	return (0);
3084 }
3085 
3086 static void
3087 pfsync_pointers_init(void)
3088 {
3089 
3090 	PF_RULES_WLOCK();
3091 	V_pfsync_state_import_ptr = pfsync_state_import;
3092 	V_pfsync_insert_state_ptr = pfsync_insert_state;
3093 	V_pfsync_update_state_ptr = pfsync_update_state;
3094 	V_pfsync_delete_state_ptr = pfsync_delete_state;
3095 	V_pfsync_clear_states_ptr = pfsync_clear_states;
3096 	V_pfsync_defer_ptr = pfsync_defer;
3097 	PF_RULES_WUNLOCK();
3098 }
3099 
3100 static void
3101 pfsync_pointers_uninit(void)
3102 {
3103 
3104 	PF_RULES_WLOCK();
3105 	V_pfsync_state_import_ptr = NULL;
3106 	V_pfsync_insert_state_ptr = NULL;
3107 	V_pfsync_update_state_ptr = NULL;
3108 	V_pfsync_delete_state_ptr = NULL;
3109 	V_pfsync_clear_states_ptr = NULL;
3110 	V_pfsync_defer_ptr = NULL;
3111 	PF_RULES_WUNLOCK();
3112 }
3113 
3114 static void
3115 vnet_pfsync_init(const void *unused __unused)
3116 {
3117 	int error;
3118 
3119 	V_pfsync_cloner = if_clone_simple(pfsyncname,
3120 	    pfsync_clone_create, pfsync_clone_destroy, 1);
3121 	error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif,
3122 	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3123 	if (error) {
3124 		if_clone_detach(V_pfsync_cloner);
3125 		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
3126 	}
3127 
3128 	pfsync_pointers_init();
3129 }
3130 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
3131     vnet_pfsync_init, NULL);
3132 
3133 static void
3134 vnet_pfsync_uninit(const void *unused __unused)
3135 {
3136 	int ret __diagused;
3137 
3138 	pfsync_pointers_uninit();
3139 
3140 	if_clone_detach(V_pfsync_cloner);
3141 	ret = swi_remove(V_pfsync_swi_cookie);
3142 	MPASS(ret == 0);
3143 	ret = intr_event_destroy(V_pfsync_swi_ie);
3144 	MPASS(ret == 0);
3145 }
3146 
3147 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
3148     vnet_pfsync_uninit, NULL);
3149 
3150 static int
3151 pfsync_init(void)
3152 {
3153 	int error;
3154 
3155 	pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
3156 
3157 #ifdef INET
3158 	error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL);
3159 	if (error)
3160 		return (error);
3161 #endif
3162 #ifdef INET6
3163 	error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL);
3164 	if (error) {
3165 		ipproto_unregister(IPPROTO_PFSYNC);
3166 		return (error);
3167 	}
3168 #endif
3169 
3170 	return (0);
3171 }
3172 
3173 static void
3174 pfsync_uninit(void)
3175 {
3176 	pfsync_detach_ifnet_ptr = NULL;
3177 
3178 #ifdef INET
3179 	ipproto_unregister(IPPROTO_PFSYNC);
3180 #endif
3181 #ifdef INET6
3182 	ip6proto_unregister(IPPROTO_PFSYNC);
3183 #endif
3184 }
3185 
3186 static int
3187 pfsync_modevent(module_t mod, int type, void *data)
3188 {
3189 	int error = 0;
3190 
3191 	switch (type) {
3192 	case MOD_LOAD:
3193 		error = pfsync_init();
3194 		break;
3195 	case MOD_UNLOAD:
3196 		pfsync_uninit();
3197 		break;
3198 	default:
3199 		error = EINVAL;
3200 		break;
3201 	}
3202 
3203 	return (error);
3204 }
3205 
3206 static moduledata_t pfsync_mod = {
3207 	pfsyncname,
3208 	pfsync_modevent,
3209 	0
3210 };
3211 
3212 #define PFSYNC_MODVER 1
3213 
3214 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
3215 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
3216 MODULE_VERSION(pfsync, PFSYNC_MODVER);
3217 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3218