xref: /freebsd/sys/netpfil/pf/if_pfsync.c (revision e9ac41698b2f322d55ccf9da50a3596edb2c1800)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause AND ISC)
3  *
4  * Copyright (c) 2002 Michael Shalayeff
5  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*-
31  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
32  *
33  * Permission to use, copy, modify, and distribute this software for any
34  * purpose with or without fee is hereby granted, provided that the above
35  * copyright notice and this permission notice appear in all copies.
36  *
37  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
38  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
39  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
40  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
41  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
42  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
43  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44  */
45 
46 /*
47  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
48  *
49  * Revisions picked from OpenBSD after revision 1.110 import:
50  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
51  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
52  * 1.120, 1.175 - use monotonic time_uptime
53  * 1.122 - reduce number of updates for non-TCP sessions
54  * 1.125, 1.127 - rewrite merge or stale processing
55  * 1.128 - cleanups
56  * 1.146 - bzero() mbuf before sparsely filling it with data
57  * 1.170 - SIOCSIFMTU checks
58  * 1.126, 1.142 - deferred packets processing
59  * 1.173 - correct expire time processing
60  */
61 
62 #include <sys/cdefs.h>
63 #include "opt_inet.h"
64 #include "opt_inet6.h"
65 #include "opt_pf.h"
66 
67 #include <sys/param.h>
68 #include <sys/bus.h>
69 #include <sys/endian.h>
70 #include <sys/interrupt.h>
71 #include <sys/kernel.h>
72 #include <sys/lock.h>
73 #include <sys/mbuf.h>
74 #include <sys/module.h>
75 #include <sys/mutex.h>
76 #include <sys/nv.h>
77 #include <sys/priv.h>
78 #include <sys/smp.h>
79 #include <sys/socket.h>
80 #include <sys/sockio.h>
81 #include <sys/sysctl.h>
82 #include <sys/syslog.h>
83 
84 #include <net/bpf.h>
85 #include <net/if.h>
86 #include <net/if_var.h>
87 #include <net/if_clone.h>
88 #include <net/if_private.h>
89 #include <net/if_types.h>
90 #include <net/vnet.h>
91 #include <net/pfvar.h>
92 #include <net/route.h>
93 #include <net/if_pfsync.h>
94 
95 #include <netinet/if_ether.h>
96 #include <netinet/in.h>
97 #include <netinet/in_var.h>
98 #include <netinet6/in6_var.h>
99 #include <netinet/ip.h>
100 #include <netinet/ip6.h>
101 #include <netinet/ip_carp.h>
102 #include <netinet/ip_var.h>
103 #include <netinet/tcp.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/tcp_seq.h>
106 
107 #include <netinet/ip6.h>
108 #include <netinet6/ip6_var.h>
109 #include <netinet6/scope6_var.h>
110 
111 #include <netpfil/pf/pfsync_nv.h>
112 
113 struct pfsync_bucket;
114 struct pfsync_softc;
115 
116 union inet_template {
117 	struct ip	ipv4;
118 	struct ip6_hdr	ipv6;
119 };
120 
121 #define PFSYNC_MINPKT ( \
122 	sizeof(union inet_template) + \
123 	sizeof(struct pfsync_header) + \
124 	sizeof(struct pfsync_subheader) )
125 
126 static int	pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *,
127 		    struct pfsync_state_peer *);
128 static int	pfsync_in_clr(struct mbuf *, int, int, int, int);
129 static int	pfsync_in_ins(struct mbuf *, int, int, int, int);
130 static int	pfsync_in_iack(struct mbuf *, int, int, int, int);
131 static int	pfsync_in_upd(struct mbuf *, int, int, int, int);
132 static int	pfsync_in_upd_c(struct mbuf *, int, int, int, int);
133 static int	pfsync_in_ureq(struct mbuf *, int, int, int, int);
134 static int	pfsync_in_del_c(struct mbuf *, int, int, int, int);
135 static int	pfsync_in_bus(struct mbuf *, int, int, int, int);
136 static int	pfsync_in_tdb(struct mbuf *, int, int, int, int);
137 static int	pfsync_in_eof(struct mbuf *, int, int, int, int);
138 static int	pfsync_in_error(struct mbuf *, int, int, int, int);
139 
140 static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = {
141 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
142 	pfsync_in_ins,			/* PFSYNC_ACT_INS_1301 */
143 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
144 	pfsync_in_upd,			/* PFSYNC_ACT_UPD_1301 */
145 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
146 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
147 	pfsync_in_error,		/* PFSYNC_ACT_DEL */
148 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
149 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
150 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
151 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
152 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
153 	pfsync_in_eof,			/* PFSYNC_ACT_EOF */
154 	pfsync_in_ins,			/* PFSYNC_ACT_INS_1400 */
155 	pfsync_in_upd,			/* PFSYNC_ACT_UPD_1400 */
156 };
157 
158 struct pfsync_q {
159 	void		(*write)(struct pf_kstate *, void *);
160 	size_t		len;
161 	u_int8_t	action;
162 };
163 
164 /* We have the following sync queues */
165 enum pfsync_q_id {
166 	PFSYNC_Q_INS_1301,
167 	PFSYNC_Q_INS_1400,
168 	PFSYNC_Q_IACK,
169 	PFSYNC_Q_UPD_1301,
170 	PFSYNC_Q_UPD_1400,
171 	PFSYNC_Q_UPD_C,
172 	PFSYNC_Q_DEL_C,
173 	PFSYNC_Q_COUNT,
174 };
175 
176 /* Functions for building messages for given queue */
177 static void	pfsync_out_state_1301(struct pf_kstate *, void *);
178 static void	pfsync_out_state_1400(struct pf_kstate *, void *);
179 static void	pfsync_out_iack(struct pf_kstate *, void *);
180 static void	pfsync_out_upd_c(struct pf_kstate *, void *);
181 static void	pfsync_out_del_c(struct pf_kstate *, void *);
182 
183 /* Attach those functions to queue */
184 static struct pfsync_q pfsync_qs[] = {
185 	{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 },
186 	{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 },
187 	{ pfsync_out_iack,       sizeof(struct pfsync_ins_ack),    PFSYNC_ACT_INS_ACK },
188 	{ pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 },
189 	{ pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 },
190 	{ pfsync_out_upd_c,      sizeof(struct pfsync_upd_c),      PFSYNC_ACT_UPD_C },
191 	{ pfsync_out_del_c,      sizeof(struct pfsync_del_c),      PFSYNC_ACT_DEL_C }
192 };
193 
194 /* Map queue to pf_kstate->sync_state */
195 static u_int8_t pfsync_qid_sstate[] = {
196 	PFSYNC_S_INS,   /* PFSYNC_Q_INS_1301 */
197 	PFSYNC_S_INS,   /* PFSYNC_Q_INS_1400 */
198 	PFSYNC_S_IACK,  /* PFSYNC_Q_IACK */
199 	PFSYNC_S_UPD,   /* PFSYNC_Q_UPD_1301 */
200 	PFSYNC_S_UPD,   /* PFSYNC_Q_UPD_1400 */
201 	PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */
202 	PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */
203 };
204 
205 /* Map pf_kstate->sync_state to queue */
206 static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t);
207 
208 static void	pfsync_q_ins(struct pf_kstate *, int sync_state, bool);
209 static void	pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *);
210 
211 static void	pfsync_update_state(struct pf_kstate *);
212 static void	pfsync_tx(struct pfsync_softc *, struct mbuf *);
213 
214 struct pfsync_upd_req_item {
215 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
216 	struct pfsync_upd_req			ur_msg;
217 };
218 
219 struct pfsync_deferral {
220 	struct pfsync_softc		*pd_sc;
221 	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
222 	struct callout			pd_tmo;
223 
224 	struct pf_kstate		*pd_st;
225 	struct mbuf			*pd_m;
226 };
227 
228 struct pfsync_bucket
229 {
230 	int			b_id;
231 	struct pfsync_softc	*b_sc;
232 	struct mtx		b_mtx;
233 	struct callout		b_tmo;
234 	int			b_flags;
235 #define	PFSYNCF_BUCKET_PUSH	0x00000001
236 
237 	size_t			b_len;
238 	TAILQ_HEAD(, pf_kstate)			b_qs[PFSYNC_Q_COUNT];
239 	TAILQ_HEAD(, pfsync_upd_req_item)	b_upd_req_list;
240 	TAILQ_HEAD(, pfsync_deferral)		b_deferrals;
241 	u_int			b_deferred;
242 	uint8_t			*b_plus;
243 	size_t			b_pluslen;
244 
245 	struct  ifaltq b_snd;
246 };
247 
248 struct pfsync_softc {
249 	/* Configuration */
250 	struct ifnet		*sc_ifp;
251 	struct ifnet		*sc_sync_if;
252 	struct ip_moptions	sc_imo;
253 	struct ip6_moptions	sc_im6o;
254 	struct sockaddr_storage	sc_sync_peer;
255 	uint32_t		sc_flags;
256 	uint8_t			sc_maxupdates;
257 	union inet_template     sc_template;
258 	struct mtx		sc_mtx;
259 	uint32_t		sc_version;
260 
261 	/* Queued data */
262 	struct pfsync_bucket	*sc_buckets;
263 
264 	/* Bulk update info */
265 	struct mtx		sc_bulk_mtx;
266 	uint32_t		sc_ureq_sent;
267 	int			sc_bulk_tries;
268 	uint32_t		sc_ureq_received;
269 	int			sc_bulk_hashid;
270 	uint64_t		sc_bulk_stateid;
271 	uint32_t		sc_bulk_creatorid;
272 	struct callout		sc_bulk_tmo;
273 	struct callout		sc_bulkfail_tmo;
274 };
275 
276 #define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
277 #define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
278 #define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
279 
280 #define PFSYNC_BUCKET_LOCK(b)		mtx_lock(&(b)->b_mtx)
281 #define PFSYNC_BUCKET_UNLOCK(b)		mtx_unlock(&(b)->b_mtx)
282 #define PFSYNC_BUCKET_LOCK_ASSERT(b)	mtx_assert(&(b)->b_mtx, MA_OWNED)
283 
284 #define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
285 #define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
286 #define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
287 
288 #define PFSYNC_DEFER_TIMEOUT	20
289 
290 static const char pfsyncname[] = "pfsync";
291 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
292 VNET_DEFINE_STATIC(struct pfsync_softc	*, pfsyncif) = NULL;
293 #define	V_pfsyncif		VNET(pfsyncif)
294 VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
295 #define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
296 VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie);
297 #define	V_pfsync_swi_ie		VNET(pfsync_swi_ie)
298 VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
299 #define	V_pfsyncstats		VNET(pfsyncstats)
300 VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
301 #define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
302 VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT;
303 #define	V_pfsync_defer_timeout	VNET(pfsync_defer_timeout)
304 
305 static void	pfsync_timeout(void *);
306 static void	pfsync_push(struct pfsync_bucket *);
307 static void	pfsync_push_all(struct pfsync_softc *);
308 static void	pfsyncintr(void *);
309 static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
310 		    struct in_mfilter *, struct in6_mfilter *);
311 static void	pfsync_multicast_cleanup(struct pfsync_softc *);
312 static void	pfsync_pointers_init(void);
313 static void	pfsync_pointers_uninit(void);
314 static int	pfsync_init(void);
315 static void	pfsync_uninit(void);
316 
317 static unsigned long pfsync_buckets;
318 
319 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
320     "PFSYNC");
321 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
322     &VNET_NAME(pfsyncstats), pfsyncstats,
323     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
324 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW,
325     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
326 SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
327     &pfsync_buckets, 0, "Number of pfsync hash buckets");
328 SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW,
329     &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)");
330 
331 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
332 static void	pfsync_clone_destroy(struct ifnet *);
333 static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
334 		    struct pf_state_peer *);
335 static int	pfsyncoutput(struct ifnet *, struct mbuf *,
336 		    const struct sockaddr *, struct route *);
337 static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
338 
339 static int	pfsync_defer(struct pf_kstate *, struct mbuf *);
340 static void	pfsync_undefer(struct pfsync_deferral *, int);
341 static void	pfsync_undefer_state_locked(struct pf_kstate *, int);
342 static void	pfsync_undefer_state(struct pf_kstate *, int);
343 static void	pfsync_defer_tmo(void *);
344 
345 static void	pfsync_request_update(u_int32_t, u_int64_t);
346 static bool	pfsync_update_state_req(struct pf_kstate *);
347 
348 static void	pfsync_drop_all(struct pfsync_softc *);
349 static void	pfsync_drop(struct pfsync_softc *, int);
350 static void	pfsync_sendout(int, int);
351 static void	pfsync_send_plus(void *, size_t);
352 
353 static void	pfsync_bulk_start(void);
354 static void	pfsync_bulk_status(u_int8_t);
355 static void	pfsync_bulk_update(void *);
356 static void	pfsync_bulk_fail(void *);
357 
358 static void	pfsync_detach_ifnet(struct ifnet *);
359 
360 static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *,
361     struct pfsync_kstatus *);
362 static int pfsync_kstatus_to_softc(struct pfsync_kstatus *,
363     struct pfsync_softc *);
364 
365 #ifdef IPSEC
366 static void	pfsync_update_net_tdb(struct pfsync_tdb *);
367 #endif
368 static struct pfsync_bucket	*pfsync_get_bucket(struct pfsync_softc *,
369 		    struct pf_kstate *);
370 
371 #define PFSYNC_MAX_BULKTRIES	12
372 
373 VNET_DEFINE(struct if_clone *, pfsync_cloner);
374 #define	V_pfsync_cloner	VNET(pfsync_cloner)
375 
376 const struct in6_addr in6addr_linklocal_pfsync_group =
377 	{{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
378 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}};
379 static int
380 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
381 {
382 	struct pfsync_softc *sc;
383 	struct ifnet *ifp;
384 	struct pfsync_bucket *b;
385 	int c;
386 	enum pfsync_q_id q;
387 
388 	if (unit != 0)
389 		return (EINVAL);
390 
391 	if (! pfsync_buckets)
392 		pfsync_buckets = mp_ncpus * 2;
393 
394 	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
395 	sc->sc_flags |= PFSYNCF_OK;
396 	sc->sc_maxupdates = 128;
397 	sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
398 
399 	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
400 	if_initname(ifp, pfsyncname, unit);
401 	ifp->if_softc = sc;
402 	ifp->if_ioctl = pfsyncioctl;
403 	ifp->if_output = pfsyncoutput;
404 	ifp->if_type = IFT_PFSYNC;
405 	ifp->if_hdrlen = sizeof(struct pfsync_header);
406 	ifp->if_mtu = ETHERMTU;
407 	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
408 	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
409 	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
410 	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
411 
412 	if_attach(ifp);
413 
414 	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
415 
416 	sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
417 	    M_PFSYNC, M_ZERO | M_WAITOK);
418 	for (c = 0; c < pfsync_buckets; c++) {
419 		b = &sc->sc_buckets[c];
420 		mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
421 
422 		b->b_id = c;
423 		b->b_sc = sc;
424 		b->b_len = PFSYNC_MINPKT;
425 
426 		for (q = 0; q < PFSYNC_Q_COUNT; q++)
427 			TAILQ_INIT(&b->b_qs[q]);
428 
429 		TAILQ_INIT(&b->b_upd_req_list);
430 		TAILQ_INIT(&b->b_deferrals);
431 
432 		callout_init(&b->b_tmo, 1);
433 
434 		b->b_snd.ifq_maxlen = ifqmaxlen;
435 	}
436 
437 	V_pfsyncif = sc;
438 
439 	return (0);
440 }
441 
442 static void
443 pfsync_clone_destroy(struct ifnet *ifp)
444 {
445 	struct pfsync_softc *sc = ifp->if_softc;
446 	struct pfsync_bucket *b;
447 	int c, ret;
448 
449 	for (c = 0; c < pfsync_buckets; c++) {
450 		b = &sc->sc_buckets[c];
451 		/*
452 		 * At this stage, everything should have already been
453 		 * cleared by pfsync_uninit(), and we have only to
454 		 * drain callouts.
455 		 */
456 		PFSYNC_BUCKET_LOCK(b);
457 		while (b->b_deferred > 0) {
458 			struct pfsync_deferral *pd =
459 			    TAILQ_FIRST(&b->b_deferrals);
460 
461 			ret = callout_stop(&pd->pd_tmo);
462 			PFSYNC_BUCKET_UNLOCK(b);
463 			if (ret > 0) {
464 				pfsync_undefer(pd, 1);
465 			} else {
466 				callout_drain(&pd->pd_tmo);
467 			}
468 			PFSYNC_BUCKET_LOCK(b);
469 		}
470 		MPASS(b->b_deferred == 0);
471 		MPASS(TAILQ_EMPTY(&b->b_deferrals));
472 		PFSYNC_BUCKET_UNLOCK(b);
473 
474 		free(b->b_plus, M_PFSYNC);
475 		b->b_plus = NULL;
476 		b->b_pluslen = 0;
477 
478 		callout_drain(&b->b_tmo);
479 	}
480 
481 	callout_drain(&sc->sc_bulkfail_tmo);
482 	callout_drain(&sc->sc_bulk_tmo);
483 
484 	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
485 		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
486 	bpfdetach(ifp);
487 	if_detach(ifp);
488 
489 	pfsync_drop_all(sc);
490 
491 	if_free(ifp);
492 	pfsync_multicast_cleanup(sc);
493 	mtx_destroy(&sc->sc_mtx);
494 	mtx_destroy(&sc->sc_bulk_mtx);
495 
496 	free(sc->sc_buckets, M_PFSYNC);
497 	free(sc, M_PFSYNC);
498 
499 	V_pfsyncif = NULL;
500 }
501 
502 static int
503 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
504     struct pf_state_peer *d)
505 {
506 	if (s->scrub.scrub_flag && d->scrub == NULL) {
507 		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
508 		if (d->scrub == NULL)
509 			return (ENOMEM);
510 	}
511 
512 	return (0);
513 }
514 
515 static int
516 pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
517 {
518 	struct pfsync_softc *sc = V_pfsyncif;
519 #ifndef	__NO_STRICT_ALIGNMENT
520 	struct pfsync_state_key key[2];
521 #endif
522 	struct pfsync_state_key *kw, *ks;
523 	struct pf_kstate	*st = NULL;
524 	struct pf_state_key *skw = NULL, *sks = NULL;
525 	struct pf_krule *r = NULL;
526 	struct pfi_kkif	*kif;
527 	int error;
528 
529 	PF_RULES_RASSERT();
530 
531 	if (sp->pfs_1301.creatorid == 0) {
532 		if (V_pf_status.debug >= PF_DEBUG_MISC)
533 			printf("%s: invalid creator id: %08x\n", __func__,
534 			    ntohl(sp->pfs_1301.creatorid));
535 		return (EINVAL);
536 	}
537 
538 	if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) {
539 		if (V_pf_status.debug >= PF_DEBUG_MISC)
540 			printf("%s: unknown interface: %s\n", __func__,
541 			    sp->pfs_1301.ifname);
542 		if (flags & PFSYNC_SI_IOCTL)
543 			return (EINVAL);
544 		return (0);	/* skip this state */
545 	}
546 
547 	/*
548 	 * If the ruleset checksums match or the state is coming from the ioctl,
549 	 * it's safe to associate the state with the rule of that number.
550 	 */
551 	if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) &&
552 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) <
553 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
554 		r = pf_main_ruleset.rules[
555 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)];
556 	else
557 		r = &V_pf_default_rule;
558 
559 	if ((r->max_states &&
560 	    counter_u64_fetch(r->states_cur) >= r->max_states))
561 		goto cleanup;
562 
563 	/*
564 	 * XXXGL: consider M_WAITOK in ioctl path after.
565 	 */
566 	st = pf_alloc_state(M_NOWAIT);
567 	if (__predict_false(st == NULL))
568 		goto cleanup;
569 
570 	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
571 		goto cleanup;
572 
573 #ifndef	__NO_STRICT_ALIGNMENT
574 	bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2);
575 	kw = &key[PF_SK_WIRE];
576 	ks = &key[PF_SK_STACK];
577 #else
578 	kw = &sp->pfs_1301.key[PF_SK_WIRE];
579 	ks = &sp->pfs_1301.key[PF_SK_STACK];
580 #endif
581 
582 	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) ||
583 	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) ||
584 	    kw->port[0] != ks->port[0] ||
585 	    kw->port[1] != ks->port[1]) {
586 		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
587 		if (sks == NULL)
588 			goto cleanup;
589 	} else
590 		sks = skw;
591 
592 	/* allocate memory for scrub info */
593 	if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) ||
594 	    pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst))
595 		goto cleanup;
596 
597 	/* Copy to state key(s). */
598 	skw->addr[0] = kw->addr[0];
599 	skw->addr[1] = kw->addr[1];
600 	skw->port[0] = kw->port[0];
601 	skw->port[1] = kw->port[1];
602 	skw->proto = sp->pfs_1301.proto;
603 	skw->af = sp->pfs_1301.af;
604 	if (sks != skw) {
605 		sks->addr[0] = ks->addr[0];
606 		sks->addr[1] = ks->addr[1];
607 		sks->port[0] = ks->port[0];
608 		sks->port[1] = ks->port[1];
609 		sks->proto = sp->pfs_1301.proto;
610 		sks->af = sp->pfs_1301.af;
611 	}
612 
613 	/* copy to state */
614 	bcopy(&sp->pfs_1301.rt_addr, &st->rt_addr, sizeof(st->rt_addr));
615 	st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000;
616 	st->expire = pf_get_uptime();
617 	if (sp->pfs_1301.expire) {
618 		uint32_t timeout;
619 
620 		timeout = r->timeout[sp->pfs_1301.timeout];
621 		if (!timeout)
622 			timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout];
623 
624 		/* sp->expire may have been adaptively scaled by export. */
625 		st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000;
626 	}
627 
628 	st->direction = sp->pfs_1301.direction;
629 	st->act.log = sp->pfs_1301.log;
630 	st->timeout = sp->pfs_1301.timeout;
631 
632 	switch (msg_version) {
633 		case PFSYNC_MSG_VERSION_1301:
634 			st->state_flags = sp->pfs_1301.state_flags;
635 			/*
636 			 * In FreeBSD 13 pfsync lacks many attributes. Copy them
637 			 * from the rule if possible. If rule can't be matched
638 			 * clear any set options as we can't recover their
639 			 * parameters.
640 			*/
641 			if (r == &V_pf_default_rule) {
642 				st->state_flags &= ~PFSTATE_SETMASK;
643 			} else {
644 				/*
645 				 * Similar to pf_rule_to_actions(). This code
646 				 * won't set the actions properly if they come
647 				 * from multiple "match" rules as only rule
648 				 * creating the state is send over pfsync.
649 				 */
650 				st->act.qid = r->qid;
651 				st->act.pqid = r->pqid;
652 				st->act.rtableid = r->rtableid;
653 				if (r->scrub_flags & PFSTATE_SETTOS)
654 					st->act.set_tos = r->set_tos;
655 				st->act.min_ttl = r->min_ttl;
656 				st->act.max_mss = r->max_mss;
657 				st->state_flags |= (r->scrub_flags &
658 				    (PFSTATE_NODF|PFSTATE_RANDOMID|
659 				    PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|
660 				    PFSTATE_SETPRIO));
661 				if (r->dnpipe || r->dnrpipe) {
662 					if (r->free_flags & PFRULE_DN_IS_PIPE)
663 						st->state_flags |= PFSTATE_DN_IS_PIPE;
664 					else
665 						st->state_flags &= ~PFSTATE_DN_IS_PIPE;
666 				}
667 				st->act.dnpipe = r->dnpipe;
668 				st->act.dnrpipe = r->dnrpipe;
669 			}
670 			break;
671 		case PFSYNC_MSG_VERSION_1400:
672 			st->state_flags = ntohs(sp->pfs_1400.state_flags);
673 			st->act.qid = ntohs(sp->pfs_1400.qid);
674 			st->act.pqid = ntohs(sp->pfs_1400.pqid);
675 			st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe);
676 			st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe);
677 			st->act.rtableid = ntohl(sp->pfs_1400.rtableid);
678 			st->act.min_ttl = sp->pfs_1400.min_ttl;
679 			st->act.set_tos = sp->pfs_1400.set_tos;
680 			st->act.max_mss = ntohs(sp->pfs_1400.max_mss);
681 			st->act.set_prio[0] = sp->pfs_1400.set_prio[0];
682 			st->act.set_prio[1] = sp->pfs_1400.set_prio[1];
683 			st->rt = sp->pfs_1400.rt;
684 			if (st->rt && (st->rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname)) == NULL) {
685 				if (V_pf_status.debug >= PF_DEBUG_MISC)
686 					printf("%s: unknown route interface: %s\n",
687 					    __func__, sp->pfs_1400.rt_ifname);
688 				if (flags & PFSYNC_SI_IOCTL)
689 					error = EINVAL;
690 				else
691 					error = 0;
692 				goto cleanup_keys;
693 			}
694 			break;
695 		default:
696 			panic("%s: Unsupported pfsync_msg_version %d",
697 			    __func__, msg_version);
698 	}
699 
700 	st->id = sp->pfs_1301.id;
701 	st->creatorid = sp->pfs_1301.creatorid;
702 	pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
703 	pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
704 
705 	st->rule.ptr = r;
706 	st->nat_rule.ptr = NULL;
707 	st->anchor.ptr = NULL;
708 
709 	st->pfsync_time = time_uptime;
710 	st->sync_state = PFSYNC_S_NONE;
711 
712 	if (!(flags & PFSYNC_SI_IOCTL))
713 		st->state_flags |= PFSTATE_NOSYNC;
714 
715 	if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0)
716 		goto cleanup_state;
717 
718 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
719 	counter_u64_add(r->states_cur, 1);
720 	counter_u64_add(r->states_tot, 1);
721 
722 	if (!(flags & PFSYNC_SI_IOCTL)) {
723 		st->state_flags &= ~PFSTATE_NOSYNC;
724 		if (st->state_flags & PFSTATE_ACK) {
725 			struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
726 			PFSYNC_BUCKET_LOCK(b);
727 			pfsync_q_ins(st, PFSYNC_S_IACK, true);
728 			PFSYNC_BUCKET_UNLOCK(b);
729 
730 			pfsync_push_all(sc);
731 		}
732 	}
733 	st->state_flags &= ~PFSTATE_ACK;
734 	PF_STATE_UNLOCK(st);
735 
736 	return (0);
737 
738 cleanup:
739 	error = ENOMEM;
740 cleanup_keys:
741 	if (skw == sks)
742 		sks = NULL;
743 	uma_zfree(V_pf_state_key_z, skw);
744 	uma_zfree(V_pf_state_key_z, sks);
745 
746 cleanup_state:	/* pf_state_insert() frees the state keys. */
747 	if (st) {
748 		st->timeout = PFTM_UNLINKED; /* appease an assert */
749 		pf_free_state(st);
750 	}
751 	return (error);
752 }
753 
754 #ifdef INET
755 static int
756 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
757 {
758 	struct pfsync_softc *sc = V_pfsyncif;
759 	struct mbuf *m = *mp;
760 	struct ip *ip = mtod(m, struct ip *);
761 	struct pfsync_header *ph;
762 	struct pfsync_subheader subh;
763 
764 	int offset, len, flags = 0;
765 	int rv;
766 	uint16_t count;
767 
768 	PF_RULES_RLOCK_TRACKER;
769 
770 	*mp = NULL;
771 	V_pfsyncstats.pfsyncs_ipackets++;
772 
773 	/* Verify that we have a sync interface configured. */
774 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
775 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
776 		goto done;
777 
778 	/* verify that the packet came in on the right interface */
779 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
780 		V_pfsyncstats.pfsyncs_badif++;
781 		goto done;
782 	}
783 
784 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
785 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
786 	/* verify that the IP TTL is 255. */
787 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
788 		V_pfsyncstats.pfsyncs_badttl++;
789 		goto done;
790 	}
791 
792 	offset = ip->ip_hl << 2;
793 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
794 		V_pfsyncstats.pfsyncs_hdrops++;
795 		goto done;
796 	}
797 
798 	if (offset + sizeof(*ph) > m->m_len) {
799 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
800 			V_pfsyncstats.pfsyncs_hdrops++;
801 			return (IPPROTO_DONE);
802 		}
803 		ip = mtod(m, struct ip *);
804 	}
805 	ph = (struct pfsync_header *)((char *)ip + offset);
806 
807 	/* verify the version */
808 	if (ph->version != PFSYNC_VERSION) {
809 		V_pfsyncstats.pfsyncs_badver++;
810 		goto done;
811 	}
812 
813 	len = ntohs(ph->len) + offset;
814 	if (m->m_pkthdr.len < len) {
815 		V_pfsyncstats.pfsyncs_badlen++;
816 		goto done;
817 	}
818 
819 	/*
820 	 * Trusting pf_chksum during packet processing, as well as seeking
821 	 * in interface name tree, require holding PF_RULES_RLOCK().
822 	 */
823 	PF_RULES_RLOCK();
824 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
825 		flags = PFSYNC_SI_CKSUM;
826 
827 	offset += sizeof(*ph);
828 	while (offset <= len - sizeof(subh)) {
829 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
830 		offset += sizeof(subh);
831 
832 		if (subh.action >= PFSYNC_ACT_MAX) {
833 			V_pfsyncstats.pfsyncs_badact++;
834 			PF_RULES_RUNLOCK();
835 			goto done;
836 		}
837 
838 		count = ntohs(subh.count);
839 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
840 		rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
841 		if (rv == -1) {
842 			PF_RULES_RUNLOCK();
843 			return (IPPROTO_DONE);
844 		}
845 
846 		offset += rv;
847 	}
848 	PF_RULES_RUNLOCK();
849 
850 done:
851 	m_freem(m);
852 	return (IPPROTO_DONE);
853 }
854 #endif
855 
856 #ifdef INET6
857 static int
858 pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused)
859 {
860 	struct pfsync_softc *sc = V_pfsyncif;
861 	struct mbuf *m = *mp;
862 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
863 	struct pfsync_header *ph;
864 	struct pfsync_subheader subh;
865 
866 	int offset, len, flags = 0;
867 	int rv;
868 	uint16_t count;
869 
870 	PF_RULES_RLOCK_TRACKER;
871 
872 	*mp = NULL;
873 	V_pfsyncstats.pfsyncs_ipackets++;
874 
875 	/* Verify that we have a sync interface configured. */
876 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
877 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878 		goto done;
879 
880 	/* verify that the packet came in on the right interface */
881 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
882 		V_pfsyncstats.pfsyncs_badif++;
883 		goto done;
884 	}
885 
886 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
887 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
888 	/* verify that the IP TTL is 255. */
889 	if (ip6->ip6_hlim != PFSYNC_DFLTTL) {
890 		V_pfsyncstats.pfsyncs_badttl++;
891 		goto done;
892 	}
893 
894 
895 	offset = sizeof(*ip6);
896 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
897 		V_pfsyncstats.pfsyncs_hdrops++;
898 		goto done;
899 	}
900 
901 	if (offset + sizeof(*ph) > m->m_len) {
902 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
903 			V_pfsyncstats.pfsyncs_hdrops++;
904 			return (IPPROTO_DONE);
905 		}
906 		ip6 = mtod(m, struct ip6_hdr *);
907 	}
908 	ph = (struct pfsync_header *)((char *)ip6 + offset);
909 
910 	/* verify the version */
911 	if (ph->version != PFSYNC_VERSION) {
912 		V_pfsyncstats.pfsyncs_badver++;
913 		goto done;
914 	}
915 
916 	len = ntohs(ph->len) + offset;
917 	if (m->m_pkthdr.len < len) {
918 		V_pfsyncstats.pfsyncs_badlen++;
919 		goto done;
920 	}
921 
922 	/*
923 	 * Trusting pf_chksum during packet processing, as well as seeking
924 	 * in interface name tree, require holding PF_RULES_RLOCK().
925 	 */
926 	PF_RULES_RLOCK();
927 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
928 		flags = PFSYNC_SI_CKSUM;
929 
930 	offset += sizeof(*ph);
931 	while (offset <= len - sizeof(subh)) {
932 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
933 		offset += sizeof(subh);
934 
935 		if (subh.action >= PFSYNC_ACT_MAX) {
936 			V_pfsyncstats.pfsyncs_badact++;
937 			PF_RULES_RUNLOCK();
938 			goto done;
939 		}
940 
941 		count = ntohs(subh.count);
942 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
943 		rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action);
944 		if (rv == -1) {
945 			PF_RULES_RUNLOCK();
946 			return (IPPROTO_DONE);
947 		}
948 
949 		offset += rv;
950 	}
951 	PF_RULES_RUNLOCK();
952 
953 done:
954 	m_freem(m);
955 	return (IPPROTO_DONE);
956 }
957 #endif
958 
959 static int
960 pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action)
961 {
962 	struct pfsync_clr *clr;
963 	struct mbuf *mp;
964 	int len = sizeof(*clr) * count;
965 	int i, offp;
966 	u_int32_t creatorid;
967 
968 	mp = m_pulldown(m, offset, len, &offp);
969 	if (mp == NULL) {
970 		V_pfsyncstats.pfsyncs_badlen++;
971 		return (-1);
972 	}
973 	clr = (struct pfsync_clr *)(mp->m_data + offp);
974 
975 	for (i = 0; i < count; i++) {
976 		creatorid = clr[i].creatorid;
977 
978 		if (clr[i].ifname[0] != '\0' &&
979 		    pfi_kkif_find(clr[i].ifname) == NULL)
980 			continue;
981 
982 		for (int i = 0; i <= pf_hashmask; i++) {
983 			struct pf_idhash *ih = &V_pf_idhash[i];
984 			struct pf_kstate *s;
985 relock:
986 			PF_HASHROW_LOCK(ih);
987 			LIST_FOREACH(s, &ih->states, entry) {
988 				if (s->creatorid == creatorid) {
989 					s->state_flags |= PFSTATE_NOSYNC;
990 					pf_unlink_state(s);
991 					goto relock;
992 				}
993 			}
994 			PF_HASHROW_UNLOCK(ih);
995 		}
996 	}
997 
998 	return (len);
999 }
1000 
1001 static int
1002 pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action)
1003 {
1004 	struct mbuf *mp;
1005 	union pfsync_state_union *sa, *sp;
1006 	int i, offp, total_len, msg_version, msg_len;
1007 
1008 	switch (action) {
1009 		case PFSYNC_ACT_INS_1301:
1010 			msg_len = sizeof(struct pfsync_state_1301);
1011 			total_len = msg_len * count;
1012 			msg_version = PFSYNC_MSG_VERSION_1301;
1013 			break;
1014 		case PFSYNC_ACT_INS_1400:
1015 			msg_len = sizeof(struct pfsync_state_1400);
1016 			total_len = msg_len * count;
1017 			msg_version = PFSYNC_MSG_VERSION_1400;
1018 			break;
1019 		default:
1020 			V_pfsyncstats.pfsyncs_badact++;
1021 			return (-1);
1022 	}
1023 
1024 	mp = m_pulldown(m, offset, total_len, &offp);
1025 	if (mp == NULL) {
1026 		V_pfsyncstats.pfsyncs_badlen++;
1027 		return (-1);
1028 	}
1029 	sa = (union pfsync_state_union *)(mp->m_data + offp);
1030 
1031 	for (i = 0; i < count; i++) {
1032 		sp = (union pfsync_state_union *)((char *)sa + msg_len * i);
1033 
1034 		/* Check for invalid values. */
1035 		if (sp->pfs_1301.timeout >= PFTM_MAX ||
1036 		    sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1037 		    sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST ||
1038 		    sp->pfs_1301.direction > PF_OUT ||
1039 		    (sp->pfs_1301.af != AF_INET &&
1040 		    sp->pfs_1301.af != AF_INET6)) {
1041 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1042 				printf("%s: invalid value\n", __func__);
1043 			V_pfsyncstats.pfsyncs_badval++;
1044 			continue;
1045 		}
1046 
1047 		if (pfsync_state_import(sp, flags, msg_version) == ENOMEM)
1048 			/* Drop out, but process the rest of the actions. */
1049 			break;
1050 	}
1051 
1052 	return (total_len);
1053 }
1054 
1055 static int
1056 pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action)
1057 {
1058 	struct pfsync_ins_ack *ia, *iaa;
1059 	struct pf_kstate *st;
1060 
1061 	struct mbuf *mp;
1062 	int len = count * sizeof(*ia);
1063 	int offp, i;
1064 
1065 	mp = m_pulldown(m, offset, len, &offp);
1066 	if (mp == NULL) {
1067 		V_pfsyncstats.pfsyncs_badlen++;
1068 		return (-1);
1069 	}
1070 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1071 
1072 	for (i = 0; i < count; i++) {
1073 		ia = &iaa[i];
1074 
1075 		st = pf_find_state_byid(ia->id, ia->creatorid);
1076 		if (st == NULL)
1077 			continue;
1078 
1079 		if (st->state_flags & PFSTATE_ACK) {
1080 			pfsync_undefer_state(st, 0);
1081 		}
1082 		PF_STATE_UNLOCK(st);
1083 	}
1084 	/*
1085 	 * XXX this is not yet implemented, but we know the size of the
1086 	 * message so we can skip it.
1087 	 */
1088 
1089 	return (count * sizeof(struct pfsync_ins_ack));
1090 }
1091 
1092 static int
1093 pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src,
1094     struct pfsync_state_peer *dst)
1095 {
1096 	int sync = 0;
1097 
1098 	PF_STATE_LOCK_ASSERT(st);
1099 
1100 	/*
1101 	 * The state should never go backwards except
1102 	 * for syn-proxy states.  Neither should the
1103 	 * sequence window slide backwards.
1104 	 */
1105 	if ((st->src.state > src->state &&
1106 	    (st->src.state < PF_TCPS_PROXY_SRC ||
1107 	    src->state >= PF_TCPS_PROXY_SRC)) ||
1108 
1109 	    (st->src.state == src->state &&
1110 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
1111 		sync++;
1112 	else
1113 		pf_state_peer_ntoh(src, &st->src);
1114 
1115 	if ((st->dst.state > dst->state) ||
1116 
1117 	    (st->dst.state >= TCPS_SYN_SENT &&
1118 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
1119 		sync++;
1120 	else
1121 		pf_state_peer_ntoh(dst, &st->dst);
1122 
1123 	return (sync);
1124 }
1125 
1126 static int
1127 pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action)
1128 {
1129 	struct pfsync_softc *sc = V_pfsyncif;
1130 	union pfsync_state_union *sa, *sp;
1131 	struct pf_kstate *st;
1132 	struct mbuf *mp;
1133 	int sync, offp, i, total_len, msg_len, msg_version;
1134 
1135 	switch (action) {
1136 		case PFSYNC_ACT_UPD_1301:
1137 			msg_len = sizeof(struct pfsync_state_1301);
1138 			total_len = msg_len * count;
1139 			msg_version = PFSYNC_MSG_VERSION_1301;
1140 			break;
1141 		case PFSYNC_ACT_UPD_1400:
1142 			msg_len = sizeof(struct pfsync_state_1400);
1143 			total_len = msg_len * count;
1144 			msg_version = PFSYNC_MSG_VERSION_1400;
1145 			break;
1146 		default:
1147 			V_pfsyncstats.pfsyncs_badact++;
1148 			return (-1);
1149 	}
1150 
1151 	mp = m_pulldown(m, offset, total_len, &offp);
1152 	if (mp == NULL) {
1153 		V_pfsyncstats.pfsyncs_badlen++;
1154 		return (-1);
1155 	}
1156 	sa = (union pfsync_state_union *)(mp->m_data + offp);
1157 
1158 	for (i = 0; i < count; i++) {
1159 		sp = (union pfsync_state_union *)((char *)sa + msg_len * i);
1160 
1161 		/* check for invalid values */
1162 		if (sp->pfs_1301.timeout >= PFTM_MAX ||
1163 		    sp->pfs_1301.src.state > PF_TCPS_PROXY_DST ||
1164 		    sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) {
1165 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1166 				printf("pfsync_input: PFSYNC_ACT_UPD: "
1167 				    "invalid value\n");
1168 			}
1169 			V_pfsyncstats.pfsyncs_badval++;
1170 			continue;
1171 		}
1172 
1173 		st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid);
1174 		if (st == NULL) {
1175 			/* insert the update */
1176 			if (pfsync_state_import(sp, flags, msg_version))
1177 				V_pfsyncstats.pfsyncs_badstate++;
1178 			continue;
1179 		}
1180 
1181 		if (st->state_flags & PFSTATE_ACK) {
1182 			pfsync_undefer_state(st, 1);
1183 		}
1184 
1185 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1186 			sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst);
1187 		else {
1188 			sync = 0;
1189 
1190 			/*
1191 			 * Non-TCP protocol state machine always go
1192 			 * forwards
1193 			 */
1194 			if (st->src.state > sp->pfs_1301.src.state)
1195 				sync++;
1196 			else
1197 				pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src);
1198 			if (st->dst.state > sp->pfs_1301.dst.state)
1199 				sync++;
1200 			else
1201 				pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1202 		}
1203 		if (sync < 2) {
1204 			pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst);
1205 			pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst);
1206 			st->expire = pf_get_uptime();
1207 			st->timeout = sp->pfs_1301.timeout;
1208 		}
1209 		st->pfsync_time = time_uptime;
1210 
1211 		if (sync) {
1212 			V_pfsyncstats.pfsyncs_stale++;
1213 
1214 			pfsync_update_state(st);
1215 			PF_STATE_UNLOCK(st);
1216 			pfsync_push_all(sc);
1217 			continue;
1218 		}
1219 		PF_STATE_UNLOCK(st);
1220 	}
1221 
1222 	return (total_len);
1223 }
1224 
1225 static int
1226 pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action)
1227 {
1228 	struct pfsync_softc *sc = V_pfsyncif;
1229 	struct pfsync_upd_c *ua, *up;
1230 	struct pf_kstate *st;
1231 	int len = count * sizeof(*up);
1232 	int sync;
1233 	struct mbuf *mp;
1234 	int offp, i;
1235 
1236 	mp = m_pulldown(m, offset, len, &offp);
1237 	if (mp == NULL) {
1238 		V_pfsyncstats.pfsyncs_badlen++;
1239 		return (-1);
1240 	}
1241 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1242 
1243 	for (i = 0; i < count; i++) {
1244 		up = &ua[i];
1245 
1246 		/* check for invalid values */
1247 		if (up->timeout >= PFTM_MAX ||
1248 		    up->src.state > PF_TCPS_PROXY_DST ||
1249 		    up->dst.state > PF_TCPS_PROXY_DST) {
1250 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1251 				printf("pfsync_input: "
1252 				    "PFSYNC_ACT_UPD_C: "
1253 				    "invalid value\n");
1254 			}
1255 			V_pfsyncstats.pfsyncs_badval++;
1256 			continue;
1257 		}
1258 
1259 		st = pf_find_state_byid(up->id, up->creatorid);
1260 		if (st == NULL) {
1261 			/* We don't have this state. Ask for it. */
1262 			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1263 			pfsync_request_update(up->creatorid, up->id);
1264 			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1265 			continue;
1266 		}
1267 
1268 		if (st->state_flags & PFSTATE_ACK) {
1269 			pfsync_undefer_state(st, 1);
1270 		}
1271 
1272 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1273 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1274 		else {
1275 			sync = 0;
1276 
1277 			/*
1278 			 * Non-TCP protocol state machine always go
1279 			 * forwards
1280 			 */
1281 			if (st->src.state > up->src.state)
1282 				sync++;
1283 			else
1284 				pf_state_peer_ntoh(&up->src, &st->src);
1285 			if (st->dst.state > up->dst.state)
1286 				sync++;
1287 			else
1288 				pf_state_peer_ntoh(&up->dst, &st->dst);
1289 		}
1290 		if (sync < 2) {
1291 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1292 			pf_state_peer_ntoh(&up->dst, &st->dst);
1293 			st->expire = pf_get_uptime();
1294 			st->timeout = up->timeout;
1295 		}
1296 		st->pfsync_time = time_uptime;
1297 
1298 		if (sync) {
1299 			V_pfsyncstats.pfsyncs_stale++;
1300 
1301 			pfsync_update_state(st);
1302 			PF_STATE_UNLOCK(st);
1303 			pfsync_push_all(sc);
1304 			continue;
1305 		}
1306 		PF_STATE_UNLOCK(st);
1307 	}
1308 
1309 	return (len);
1310 }
1311 
1312 static int
1313 pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action)
1314 {
1315 	struct pfsync_upd_req *ur, *ura;
1316 	struct mbuf *mp;
1317 	int len = count * sizeof(*ur);
1318 	int i, offp;
1319 
1320 	struct pf_kstate *st;
1321 
1322 	mp = m_pulldown(m, offset, len, &offp);
1323 	if (mp == NULL) {
1324 		V_pfsyncstats.pfsyncs_badlen++;
1325 		return (-1);
1326 	}
1327 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1328 
1329 	for (i = 0; i < count; i++) {
1330 		ur = &ura[i];
1331 
1332 		if (ur->id == 0 && ur->creatorid == 0)
1333 			pfsync_bulk_start();
1334 		else {
1335 			st = pf_find_state_byid(ur->id, ur->creatorid);
1336 			if (st == NULL) {
1337 				V_pfsyncstats.pfsyncs_badstate++;
1338 				continue;
1339 			}
1340 			if (st->state_flags & PFSTATE_NOSYNC) {
1341 				PF_STATE_UNLOCK(st);
1342 				continue;
1343 			}
1344 
1345 			pfsync_update_state_req(st);
1346 			PF_STATE_UNLOCK(st);
1347 		}
1348 	}
1349 
1350 	return (len);
1351 }
1352 
1353 static int
1354 pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action)
1355 {
1356 	struct mbuf *mp;
1357 	struct pfsync_del_c *sa, *sp;
1358 	struct pf_kstate *st;
1359 	int len = count * sizeof(*sp);
1360 	int offp, i;
1361 
1362 	mp = m_pulldown(m, offset, len, &offp);
1363 	if (mp == NULL) {
1364 		V_pfsyncstats.pfsyncs_badlen++;
1365 		return (-1);
1366 	}
1367 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1368 
1369 	for (i = 0; i < count; i++) {
1370 		sp = &sa[i];
1371 
1372 		st = pf_find_state_byid(sp->id, sp->creatorid);
1373 		if (st == NULL) {
1374 			V_pfsyncstats.pfsyncs_badstate++;
1375 			continue;
1376 		}
1377 
1378 		st->state_flags |= PFSTATE_NOSYNC;
1379 		pf_unlink_state(st);
1380 	}
1381 
1382 	return (len);
1383 }
1384 
1385 static int
1386 pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action)
1387 {
1388 	struct pfsync_softc *sc = V_pfsyncif;
1389 	struct pfsync_bus *bus;
1390 	struct mbuf *mp;
1391 	int len = count * sizeof(*bus);
1392 	int offp;
1393 
1394 	PFSYNC_BLOCK(sc);
1395 
1396 	/* If we're not waiting for a bulk update, who cares. */
1397 	if (sc->sc_ureq_sent == 0) {
1398 		PFSYNC_BUNLOCK(sc);
1399 		return (len);
1400 	}
1401 
1402 	mp = m_pulldown(m, offset, len, &offp);
1403 	if (mp == NULL) {
1404 		PFSYNC_BUNLOCK(sc);
1405 		V_pfsyncstats.pfsyncs_badlen++;
1406 		return (-1);
1407 	}
1408 	bus = (struct pfsync_bus *)(mp->m_data + offp);
1409 
1410 	switch (bus->status) {
1411 	case PFSYNC_BUS_START:
1412 		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1413 		    V_pf_limits[PF_LIMIT_STATES].limit /
1414 		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1415 		    sizeof(union pfsync_state_union)),
1416 		    pfsync_bulk_fail, sc);
1417 		if (V_pf_status.debug >= PF_DEBUG_MISC)
1418 			printf("pfsync: received bulk update start\n");
1419 		break;
1420 
1421 	case PFSYNC_BUS_END:
1422 		if (time_uptime - ntohl(bus->endtime) >=
1423 		    sc->sc_ureq_sent) {
1424 			/* that's it, we're happy */
1425 			sc->sc_ureq_sent = 0;
1426 			sc->sc_bulk_tries = 0;
1427 			callout_stop(&sc->sc_bulkfail_tmo);
1428 			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1429 				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1430 				    "pfsync bulk done");
1431 			sc->sc_flags |= PFSYNCF_OK;
1432 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1433 				printf("pfsync: received valid "
1434 				    "bulk update end\n");
1435 		} else {
1436 			if (V_pf_status.debug >= PF_DEBUG_MISC)
1437 				printf("pfsync: received invalid "
1438 				    "bulk update end: bad timestamp\n");
1439 		}
1440 		break;
1441 	}
1442 	PFSYNC_BUNLOCK(sc);
1443 
1444 	return (len);
1445 }
1446 
1447 static int
1448 pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action)
1449 {
1450 	int len = count * sizeof(struct pfsync_tdb);
1451 
1452 #if defined(IPSEC)
1453 	struct pfsync_tdb *tp;
1454 	struct mbuf *mp;
1455 	int offp;
1456 	int i;
1457 	int s;
1458 
1459 	mp = m_pulldown(m, offset, len, &offp);
1460 	if (mp == NULL) {
1461 		V_pfsyncstats.pfsyncs_badlen++;
1462 		return (-1);
1463 	}
1464 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1465 
1466 	for (i = 0; i < count; i++)
1467 		pfsync_update_net_tdb(&tp[i]);
1468 #endif
1469 
1470 	return (len);
1471 }
1472 
1473 #if defined(IPSEC)
1474 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1475 static void
1476 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1477 {
1478 	struct tdb		*tdb;
1479 	int			 s;
1480 
1481 	/* check for invalid values */
1482 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1483 	    (pt->dst.sa.sa_family != AF_INET &&
1484 	    pt->dst.sa.sa_family != AF_INET6))
1485 		goto bad;
1486 
1487 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1488 	if (tdb) {
1489 		pt->rpl = ntohl(pt->rpl);
1490 		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1491 
1492 		/* Neither replay nor byte counter should ever decrease. */
1493 		if (pt->rpl < tdb->tdb_rpl ||
1494 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1495 			goto bad;
1496 		}
1497 
1498 		tdb->tdb_rpl = pt->rpl;
1499 		tdb->tdb_cur_bytes = pt->cur_bytes;
1500 	}
1501 	return;
1502 
1503 bad:
1504 	if (V_pf_status.debug >= PF_DEBUG_MISC)
1505 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1506 		    "invalid value\n");
1507 	V_pfsyncstats.pfsyncs_badstate++;
1508 	return;
1509 }
1510 #endif
1511 
1512 static int
1513 pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action)
1514 {
1515 	/* check if we are at the right place in the packet */
1516 	if (offset != m->m_pkthdr.len)
1517 		V_pfsyncstats.pfsyncs_badlen++;
1518 
1519 	/* we're done. free and let the caller return */
1520 	m_freem(m);
1521 	return (-1);
1522 }
1523 
1524 static int
1525 pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action)
1526 {
1527 	V_pfsyncstats.pfsyncs_badact++;
1528 
1529 	m_freem(m);
1530 	return (-1);
1531 }
1532 
1533 static int
1534 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1535 	struct route *rt)
1536 {
1537 	m_freem(m);
1538 	return (0);
1539 }
1540 
1541 /* ARGSUSED */
1542 static int
1543 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1544 {
1545 	struct pfsync_softc *sc = ifp->if_softc;
1546 	struct ifreq *ifr = (struct ifreq *)data;
1547 	struct pfsyncreq pfsyncr;
1548 	size_t nvbuflen;
1549 	int error;
1550 	int c;
1551 
1552 	switch (cmd) {
1553 	case SIOCSIFFLAGS:
1554 		PFSYNC_LOCK(sc);
1555 		if (ifp->if_flags & IFF_UP) {
1556 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1557 			PFSYNC_UNLOCK(sc);
1558 			pfsync_pointers_init();
1559 		} else {
1560 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1561 			PFSYNC_UNLOCK(sc);
1562 			pfsync_pointers_uninit();
1563 		}
1564 		break;
1565 	case SIOCSIFMTU:
1566 		if (!sc->sc_sync_if ||
1567 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1568 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1569 			return (EINVAL);
1570 		if (ifr->ifr_mtu < ifp->if_mtu) {
1571 			for (c = 0; c < pfsync_buckets; c++) {
1572 				PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1573 				if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
1574 					pfsync_sendout(1, c);
1575 				PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1576 			}
1577 		}
1578 		ifp->if_mtu = ifr->ifr_mtu;
1579 		break;
1580 	case SIOCGETPFSYNC:
1581 		bzero(&pfsyncr, sizeof(pfsyncr));
1582 		PFSYNC_LOCK(sc);
1583 		if (sc->sc_sync_if) {
1584 			strlcpy(pfsyncr.pfsyncr_syncdev,
1585 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1586 		}
1587 		pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
1588 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1589 		pfsyncr.pfsyncr_defer = sc->sc_flags;
1590 		PFSYNC_UNLOCK(sc);
1591 		return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
1592 		    sizeof(pfsyncr)));
1593 
1594 	case SIOCGETPFSYNCNV:
1595 	    {
1596 		nvlist_t *nvl_syncpeer;
1597 		nvlist_t *nvl = nvlist_create(0);
1598 
1599 		if (nvl == NULL)
1600 			return (ENOMEM);
1601 
1602 		if (sc->sc_sync_if)
1603 			nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname);
1604 		nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates);
1605 		nvlist_add_number(nvl, "flags", sc->sc_flags);
1606 		nvlist_add_number(nvl, "version", sc->sc_version);
1607 		if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL)
1608 			nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer);
1609 
1610 		void *packed = NULL;
1611 		packed = nvlist_pack(nvl, &nvbuflen);
1612 		if (packed == NULL) {
1613 			free(packed, M_NVLIST);
1614 			nvlist_destroy(nvl);
1615 			return (ENOMEM);
1616 		}
1617 
1618 		if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
1619 			ifr->ifr_cap_nv.length = nvbuflen;
1620 			ifr->ifr_cap_nv.buffer = NULL;
1621 			free(packed, M_NVLIST);
1622 			nvlist_destroy(nvl);
1623 			return (EFBIG);
1624 		}
1625 
1626 		ifr->ifr_cap_nv.length = nvbuflen;
1627 		error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen);
1628 
1629 		nvlist_destroy(nvl);
1630 		nvlist_destroy(nvl_syncpeer);
1631 		free(packed, M_NVLIST);
1632 		break;
1633 	    }
1634 
1635 	case SIOCSETPFSYNC:
1636 	    {
1637 		struct pfsync_kstatus status;
1638 
1639 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1640 			return (error);
1641 		if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
1642 		    sizeof(pfsyncr))))
1643 			return (error);
1644 
1645 		memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1646 		pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status);
1647 
1648 		error = pfsync_kstatus_to_softc(&status, sc);
1649 		return (error);
1650 	    }
1651 	case SIOCSETPFSYNCNV:
1652 	    {
1653 		struct pfsync_kstatus status;
1654 		void *data;
1655 		nvlist_t *nvl;
1656 
1657 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1658 			return (error);
1659 		if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
1660 			return (EINVAL);
1661 
1662 		data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
1663 
1664 		if ((error = copyin(ifr->ifr_cap_nv.buffer, data,
1665 		    ifr->ifr_cap_nv.length)) != 0) {
1666 			free(data, M_TEMP);
1667 			return (error);
1668 		}
1669 
1670 		if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) {
1671 			free(data, M_TEMP);
1672 			return (EINVAL);
1673 		}
1674 
1675 		memset((char *)&status, 0, sizeof(struct pfsync_kstatus));
1676 		pfsync_nvstatus_to_kstatus(nvl, &status);
1677 
1678 		nvlist_destroy(nvl);
1679 		free(data, M_TEMP);
1680 
1681 		error = pfsync_kstatus_to_softc(&status, sc);
1682 		return (error);
1683 	    }
1684 	default:
1685 		return (ENOTTY);
1686 	}
1687 
1688 	return (0);
1689 }
1690 
1691 static void
1692 pfsync_out_state_1301(struct pf_kstate *st, void *buf)
1693 {
1694 	union pfsync_state_union *sp = buf;
1695 
1696 	pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301);
1697 }
1698 
1699 static void
1700 pfsync_out_state_1400(struct pf_kstate *st, void *buf)
1701 {
1702 	union pfsync_state_union *sp = buf;
1703 
1704 	pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400);
1705 }
1706 
1707 static void
1708 pfsync_out_iack(struct pf_kstate *st, void *buf)
1709 {
1710 	struct pfsync_ins_ack *iack = buf;
1711 
1712 	iack->id = st->id;
1713 	iack->creatorid = st->creatorid;
1714 }
1715 
1716 static void
1717 pfsync_out_upd_c(struct pf_kstate *st, void *buf)
1718 {
1719 	struct pfsync_upd_c *up = buf;
1720 
1721 	bzero(up, sizeof(*up));
1722 	up->id = st->id;
1723 	pf_state_peer_hton(&st->src, &up->src);
1724 	pf_state_peer_hton(&st->dst, &up->dst);
1725 	up->creatorid = st->creatorid;
1726 	up->timeout = st->timeout;
1727 }
1728 
1729 static void
1730 pfsync_out_del_c(struct pf_kstate *st, void *buf)
1731 {
1732 	struct pfsync_del_c *dp = buf;
1733 
1734 	dp->id = st->id;
1735 	dp->creatorid = st->creatorid;
1736 	st->state_flags |= PFSTATE_NOSYNC;
1737 }
1738 
1739 static void
1740 pfsync_drop_all(struct pfsync_softc *sc)
1741 {
1742 	struct pfsync_bucket *b;
1743 	int c;
1744 
1745 	for (c = 0; c < pfsync_buckets; c++) {
1746 		b = &sc->sc_buckets[c];
1747 
1748 		PFSYNC_BUCKET_LOCK(b);
1749 		pfsync_drop(sc, c);
1750 		PFSYNC_BUCKET_UNLOCK(b);
1751 	}
1752 }
1753 
1754 static void
1755 pfsync_drop(struct pfsync_softc *sc, int c)
1756 {
1757 	struct pf_kstate *st, *next;
1758 	struct pfsync_upd_req_item *ur;
1759 	struct pfsync_bucket *b;
1760 	enum pfsync_q_id q;
1761 
1762 	b = &sc->sc_buckets[c];
1763 	PFSYNC_BUCKET_LOCK_ASSERT(b);
1764 
1765 	for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1766 		if (TAILQ_EMPTY(&b->b_qs[q]))
1767 			continue;
1768 
1769 		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
1770 			KASSERT(st->sync_state == pfsync_qid_sstate[q],
1771 				("%s: st->sync_state %d == q %d",
1772 					__func__, st->sync_state, q));
1773 			st->sync_state = PFSYNC_S_NONE;
1774 			pf_release_state(st);
1775 		}
1776 		TAILQ_INIT(&b->b_qs[q]);
1777 	}
1778 
1779 	while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1780 		TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1781 		free(ur, M_PFSYNC);
1782 	}
1783 
1784 	b->b_len = PFSYNC_MINPKT;
1785 	free(b->b_plus, M_PFSYNC);
1786 	b->b_plus = NULL;
1787 	b->b_pluslen = 0;
1788 }
1789 
1790 static void
1791 pfsync_sendout(int schedswi, int c)
1792 {
1793 	struct pfsync_softc *sc = V_pfsyncif;
1794 	struct ifnet *ifp = sc->sc_ifp;
1795 	struct mbuf *m;
1796 	struct pfsync_header *ph;
1797 	struct pfsync_subheader *subh;
1798 	struct pf_kstate *st, *st_next;
1799 	struct pfsync_upd_req_item *ur;
1800 	struct pfsync_bucket *b = &sc->sc_buckets[c];
1801 	size_t len;
1802 	int aflen, offset, count = 0;
1803 	enum pfsync_q_id q;
1804 
1805 	KASSERT(sc != NULL, ("%s: null sc", __func__));
1806 	KASSERT(b->b_len > PFSYNC_MINPKT,
1807 	    ("%s: sc_len %zu", __func__, b->b_len));
1808 	PFSYNC_BUCKET_LOCK_ASSERT(b);
1809 
1810 	if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) {
1811 		pfsync_drop(sc, c);
1812 		return;
1813 	}
1814 
1815 	m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
1816 	if (m == NULL) {
1817 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1818 		V_pfsyncstats.pfsyncs_onomem++;
1819 		return;
1820 	}
1821 	m->m_data += max_linkhdr;
1822 	bzero(m->m_data, b->b_len);
1823 
1824 	len = b->b_len;
1825 
1826 	/* build the ip header */
1827 	switch (sc->sc_sync_peer.ss_family) {
1828 #ifdef INET
1829 	case AF_INET:
1830 	    {
1831 		struct ip *ip;
1832 
1833 		ip = mtod(m, struct ip *);
1834 		bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip));
1835 		aflen = offset = sizeof(*ip);
1836 
1837 		len -= sizeof(union inet_template) - sizeof(struct ip);
1838 		ip->ip_len = htons(len);
1839 		ip_fillid(ip);
1840 		break;
1841 	    }
1842 #endif
1843 #ifdef INET6
1844 	case AF_INET6:
1845 		{
1846 		struct ip6_hdr *ip6;
1847 
1848 		ip6 = mtod(m, struct ip6_hdr *);
1849 		bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6));
1850 		aflen = offset = sizeof(*ip6);
1851 
1852 		len -= sizeof(union inet_template) - sizeof(struct ip6_hdr);
1853 		ip6->ip6_plen = htons(len);
1854 		break;
1855 		}
1856 #endif
1857 	default:
1858 		m_freem(m);
1859 		pfsync_drop(sc, c);
1860 		return;
1861 	}
1862 	m->m_len = m->m_pkthdr.len = len;
1863 
1864 	/* build the pfsync header */
1865 	ph = (struct pfsync_header *)(m->m_data + offset);
1866 	offset += sizeof(*ph);
1867 
1868 	ph->version = PFSYNC_VERSION;
1869 	ph->len = htons(len - aflen);
1870 	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1871 
1872 	/* walk the queues */
1873 	for (q = 0; q < PFSYNC_Q_COUNT; q++) {
1874 		if (TAILQ_EMPTY(&b->b_qs[q]))
1875 			continue;
1876 
1877 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1878 		offset += sizeof(*subh);
1879 
1880 		count = 0;
1881 		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
1882 			KASSERT(st->sync_state == pfsync_qid_sstate[q],
1883 				("%s: st->sync_state == q",
1884 					__func__));
1885 			/*
1886 			 * XXXGL: some of write methods do unlocked reads
1887 			 * of state data :(
1888 			 */
1889 			pfsync_qs[q].write(st, m->m_data + offset);
1890 			offset += pfsync_qs[q].len;
1891 			st->sync_state = PFSYNC_S_NONE;
1892 			pf_release_state(st);
1893 			count++;
1894 		}
1895 		TAILQ_INIT(&b->b_qs[q]);
1896 
1897 		subh->action = pfsync_qs[q].action;
1898 		subh->count = htons(count);
1899 		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
1900 	}
1901 
1902 	if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
1903 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1904 		offset += sizeof(*subh);
1905 
1906 		count = 0;
1907 		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1908 			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1909 
1910 			bcopy(&ur->ur_msg, m->m_data + offset,
1911 			    sizeof(ur->ur_msg));
1912 			offset += sizeof(ur->ur_msg);
1913 			free(ur, M_PFSYNC);
1914 			count++;
1915 		}
1916 
1917 		subh->action = PFSYNC_ACT_UPD_REQ;
1918 		subh->count = htons(count);
1919 		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
1920 	}
1921 
1922 	/* has someone built a custom region for us to add? */
1923 	if (b->b_plus != NULL) {
1924 		bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
1925 		offset += b->b_pluslen;
1926 
1927 		free(b->b_plus, M_PFSYNC);
1928 		b->b_plus = NULL;
1929 		b->b_pluslen = 0;
1930 	}
1931 
1932 	subh = (struct pfsync_subheader *)(m->m_data + offset);
1933 	offset += sizeof(*subh);
1934 
1935 	subh->action = PFSYNC_ACT_EOF;
1936 	subh->count = htons(1);
1937 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
1938 
1939 	/* we're done, let's put it on the wire */
1940 	if (bpf_peers_present(ifp->if_bpf)) {
1941 		m->m_data += aflen;
1942 		m->m_len = m->m_pkthdr.len = len - aflen;
1943 		bpf_mtap(ifp->if_bpf, m);
1944 		m->m_data -= aflen;
1945 		m->m_len = m->m_pkthdr.len = len;
1946 	}
1947 
1948 	if (sc->sc_sync_if == NULL) {
1949 		b->b_len = PFSYNC_MINPKT;
1950 		m_freem(m);
1951 		return;
1952 	}
1953 
1954 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
1955 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1956 	b->b_len = PFSYNC_MINPKT;
1957 
1958 	if (!_IF_QFULL(&b->b_snd))
1959 		_IF_ENQUEUE(&b->b_snd, m);
1960 	else {
1961 		m_freem(m);
1962 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
1963 	}
1964 	if (schedswi)
1965 		swi_sched(V_pfsync_swi_cookie, 0);
1966 }
1967 
1968 static void
1969 pfsync_insert_state(struct pf_kstate *st)
1970 {
1971 	struct pfsync_softc *sc = V_pfsyncif;
1972 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1973 
1974 	if (st->state_flags & PFSTATE_NOSYNC)
1975 		return;
1976 
1977 	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
1978 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1979 		st->state_flags |= PFSTATE_NOSYNC;
1980 		return;
1981 	}
1982 
1983 	KASSERT(st->sync_state == PFSYNC_S_NONE,
1984 		("%s: st->sync_state %u", __func__, st->sync_state));
1985 
1986 	PFSYNC_BUCKET_LOCK(b);
1987 	if (b->b_len == PFSYNC_MINPKT)
1988 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1989 
1990 	pfsync_q_ins(st, PFSYNC_S_INS, true);
1991 	PFSYNC_BUCKET_UNLOCK(b);
1992 
1993 	st->sync_updates = 0;
1994 }
1995 
1996 static int
1997 pfsync_defer(struct pf_kstate *st, struct mbuf *m)
1998 {
1999 	struct pfsync_softc *sc = V_pfsyncif;
2000 	struct pfsync_deferral *pd;
2001 	struct pfsync_bucket *b;
2002 
2003 	if (m->m_flags & (M_BCAST|M_MCAST))
2004 		return (0);
2005 
2006 	if (sc == NULL)
2007 		return (0);
2008 
2009 	b = pfsync_get_bucket(sc, st);
2010 
2011 	PFSYNC_LOCK(sc);
2012 
2013 	if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
2014 	    !(sc->sc_flags & PFSYNCF_DEFER)) {
2015 		PFSYNC_UNLOCK(sc);
2016 		return (0);
2017 	}
2018 
2019 	PFSYNC_BUCKET_LOCK(b);
2020 	PFSYNC_UNLOCK(sc);
2021 
2022 	if (b->b_deferred >= 128)
2023 		pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
2024 
2025 	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
2026 	if (pd == NULL) {
2027 		PFSYNC_BUCKET_UNLOCK(b);
2028 		return (0);
2029 	}
2030 	b->b_deferred++;
2031 
2032 	m->m_flags |= M_SKIP_FIREWALL;
2033 	st->state_flags |= PFSTATE_ACK;
2034 
2035 	pd->pd_sc = sc;
2036 	pd->pd_st = st;
2037 	pf_ref_state(st);
2038 	pd->pd_m = m;
2039 
2040 	TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
2041 	callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
2042 	callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000,
2043 	    pfsync_defer_tmo, pd);
2044 
2045 	pfsync_push(b);
2046 	PFSYNC_BUCKET_UNLOCK(b);
2047 
2048 	return (1);
2049 }
2050 
2051 static void
2052 pfsync_undefer(struct pfsync_deferral *pd, int drop)
2053 {
2054 	struct pfsync_softc *sc = pd->pd_sc;
2055 	struct mbuf *m = pd->pd_m;
2056 	struct pf_kstate *st = pd->pd_st;
2057 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2058 
2059 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2060 
2061 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2062 	b->b_deferred--;
2063 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
2064 	free(pd, M_PFSYNC);
2065 	pf_release_state(st);
2066 
2067 	if (drop)
2068 		m_freem(m);
2069 	else {
2070 		_IF_ENQUEUE(&b->b_snd, m);
2071 		pfsync_push(b);
2072 	}
2073 }
2074 
2075 static void
2076 pfsync_defer_tmo(void *arg)
2077 {
2078 	struct epoch_tracker et;
2079 	struct pfsync_deferral *pd = arg;
2080 	struct pfsync_softc *sc = pd->pd_sc;
2081 	struct mbuf *m = pd->pd_m;
2082 	struct pf_kstate *st = pd->pd_st;
2083 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2084 
2085 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2086 
2087 	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
2088 	b->b_deferred--;
2089 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
2090 	PFSYNC_BUCKET_UNLOCK(b);
2091 	free(pd, M_PFSYNC);
2092 
2093 	if (sc->sc_sync_if == NULL) {
2094 		pf_release_state(st);
2095 		m_freem(m);
2096 		return;
2097 	}
2098 
2099 	NET_EPOCH_ENTER(et);
2100 	CURVNET_SET(sc->sc_sync_if->if_vnet);
2101 
2102 	pfsync_tx(sc, m);
2103 
2104 	pf_release_state(st);
2105 
2106 	CURVNET_RESTORE();
2107 	NET_EPOCH_EXIT(et);
2108 }
2109 
2110 static void
2111 pfsync_undefer_state_locked(struct pf_kstate *st, int drop)
2112 {
2113 	struct pfsync_softc *sc = V_pfsyncif;
2114 	struct pfsync_deferral *pd;
2115 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2116 
2117 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2118 
2119 	TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
2120 		 if (pd->pd_st == st) {
2121 			if (callout_stop(&pd->pd_tmo) > 0)
2122 				pfsync_undefer(pd, drop);
2123 
2124 			return;
2125 		}
2126 	}
2127 
2128 	panic("%s: unable to find deferred state", __func__);
2129 }
2130 
2131 static void
2132 pfsync_undefer_state(struct pf_kstate *st, int drop)
2133 {
2134 	struct pfsync_softc *sc = V_pfsyncif;
2135 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2136 
2137 	PFSYNC_BUCKET_LOCK(b);
2138 	pfsync_undefer_state_locked(st, drop);
2139 	PFSYNC_BUCKET_UNLOCK(b);
2140 }
2141 
2142 static struct pfsync_bucket*
2143 pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st)
2144 {
2145 	int c = PF_IDHASH(st) % pfsync_buckets;
2146 	return &sc->sc_buckets[c];
2147 }
2148 
2149 static void
2150 pfsync_update_state(struct pf_kstate *st)
2151 {
2152 	struct pfsync_softc *sc = V_pfsyncif;
2153 	bool sync = false, ref = true;
2154 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2155 
2156 	PF_STATE_LOCK_ASSERT(st);
2157 	PFSYNC_BUCKET_LOCK(b);
2158 
2159 	if (st->state_flags & PFSTATE_ACK)
2160 		pfsync_undefer_state_locked(st, 0);
2161 	if (st->state_flags & PFSTATE_NOSYNC) {
2162 		if (st->sync_state != PFSYNC_S_NONE)
2163 			pfsync_q_del(st, true, b);
2164 		PFSYNC_BUCKET_UNLOCK(b);
2165 		return;
2166 	}
2167 
2168 	if (b->b_len == PFSYNC_MINPKT)
2169 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2170 
2171 	switch (st->sync_state) {
2172 	case PFSYNC_S_UPD_C:
2173 	case PFSYNC_S_UPD:
2174 	case PFSYNC_S_INS:
2175 		/* we're already handling it */
2176 
2177 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2178 			st->sync_updates++;
2179 			if (st->sync_updates >= sc->sc_maxupdates)
2180 				sync = true;
2181 		}
2182 		break;
2183 
2184 	case PFSYNC_S_IACK:
2185 		pfsync_q_del(st, false, b);
2186 		ref = false;
2187 		/* FALLTHROUGH */
2188 
2189 	case PFSYNC_S_NONE:
2190 		pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
2191 		st->sync_updates = 0;
2192 		break;
2193 
2194 	default:
2195 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2196 	}
2197 
2198 	if (sync || (time_uptime - st->pfsync_time) < 2)
2199 		pfsync_push(b);
2200 
2201 	PFSYNC_BUCKET_UNLOCK(b);
2202 }
2203 
2204 static void
2205 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2206 {
2207 	struct pfsync_softc *sc = V_pfsyncif;
2208 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2209 	struct pfsync_upd_req_item *item;
2210 	size_t nlen = sizeof(struct pfsync_upd_req);
2211 
2212 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2213 
2214 	/*
2215 	 * This code does a bit to prevent multiple update requests for the
2216 	 * same state being generated. It searches current subheader queue,
2217 	 * but it doesn't lookup into queue of already packed datagrams.
2218 	 */
2219 	TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
2220 		if (item->ur_msg.id == id &&
2221 		    item->ur_msg.creatorid == creatorid)
2222 			return;
2223 
2224 	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
2225 	if (item == NULL)
2226 		return; /* XXX stats */
2227 
2228 	item->ur_msg.id = id;
2229 	item->ur_msg.creatorid = creatorid;
2230 
2231 	if (TAILQ_EMPTY(&b->b_upd_req_list))
2232 		nlen += sizeof(struct pfsync_subheader);
2233 
2234 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2235 		pfsync_sendout(0, 0);
2236 
2237 		nlen = sizeof(struct pfsync_subheader) +
2238 		    sizeof(struct pfsync_upd_req);
2239 	}
2240 
2241 	TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
2242 	b->b_len += nlen;
2243 
2244 	pfsync_push(b);
2245 }
2246 
2247 static bool
2248 pfsync_update_state_req(struct pf_kstate *st)
2249 {
2250 	struct pfsync_softc *sc = V_pfsyncif;
2251 	bool ref = true, full = false;
2252 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2253 
2254 	PF_STATE_LOCK_ASSERT(st);
2255 	PFSYNC_BUCKET_LOCK(b);
2256 
2257 	if (st->state_flags & PFSTATE_NOSYNC) {
2258 		if (st->sync_state != PFSYNC_S_NONE)
2259 			pfsync_q_del(st, true, b);
2260 		PFSYNC_BUCKET_UNLOCK(b);
2261 		return (full);
2262 	}
2263 
2264 	switch (st->sync_state) {
2265 	case PFSYNC_S_UPD_C:
2266 	case PFSYNC_S_IACK:
2267 		pfsync_q_del(st, false, b);
2268 		ref = false;
2269 		/* FALLTHROUGH */
2270 
2271 	case PFSYNC_S_NONE:
2272 		pfsync_q_ins(st, PFSYNC_S_UPD, ref);
2273 		pfsync_push(b);
2274 		break;
2275 
2276 	case PFSYNC_S_INS:
2277 	case PFSYNC_S_UPD:
2278 	case PFSYNC_S_DEL_C:
2279 		/* we're already handling it */
2280 		break;
2281 
2282 	default:
2283 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2284 	}
2285 
2286 	if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union))
2287 		full = true;
2288 
2289 	PFSYNC_BUCKET_UNLOCK(b);
2290 
2291 	return (full);
2292 }
2293 
2294 static void
2295 pfsync_delete_state(struct pf_kstate *st)
2296 {
2297 	struct pfsync_softc *sc = V_pfsyncif;
2298 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2299 	bool ref = true;
2300 
2301 	PFSYNC_BUCKET_LOCK(b);
2302 	if (st->state_flags & PFSTATE_ACK)
2303 		pfsync_undefer_state_locked(st, 1);
2304 	if (st->state_flags & PFSTATE_NOSYNC) {
2305 		if (st->sync_state != PFSYNC_S_NONE)
2306 			pfsync_q_del(st, true, b);
2307 		PFSYNC_BUCKET_UNLOCK(b);
2308 		return;
2309 	}
2310 
2311 	if (b->b_len == PFSYNC_MINPKT)
2312 		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2313 
2314 	switch (st->sync_state) {
2315 	case PFSYNC_S_INS:
2316 		/* We never got to tell the world so just forget about it. */
2317 		pfsync_q_del(st, true, b);
2318 		break;
2319 
2320 	case PFSYNC_S_UPD_C:
2321 	case PFSYNC_S_UPD:
2322 	case PFSYNC_S_IACK:
2323 		pfsync_q_del(st, false, b);
2324 		ref = false;
2325 		/* FALLTHROUGH */
2326 
2327 	case PFSYNC_S_NONE:
2328 		pfsync_q_ins(st, PFSYNC_S_DEL_C, ref);
2329 		break;
2330 
2331 	default:
2332 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2333 	}
2334 
2335 	PFSYNC_BUCKET_UNLOCK(b);
2336 }
2337 
2338 static void
2339 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2340 {
2341 	struct {
2342 		struct pfsync_subheader subh;
2343 		struct pfsync_clr clr;
2344 	} __packed r;
2345 
2346 	bzero(&r, sizeof(r));
2347 
2348 	r.subh.action = PFSYNC_ACT_CLR;
2349 	r.subh.count = htons(1);
2350 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
2351 
2352 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2353 	r.clr.creatorid = creatorid;
2354 
2355 	pfsync_send_plus(&r, sizeof(r));
2356 }
2357 
2358 static enum pfsync_q_id
2359 pfsync_sstate_to_qid(u_int8_t sync_state)
2360 {
2361 	struct pfsync_softc *sc = V_pfsyncif;
2362 
2363 	switch (sync_state) {
2364 		case PFSYNC_S_INS:
2365 			switch (sc->sc_version) {
2366 				case PFSYNC_MSG_VERSION_1301:
2367 					return PFSYNC_Q_INS_1301;
2368 				case PFSYNC_MSG_VERSION_1400:
2369 					return PFSYNC_Q_INS_1400;
2370 			}
2371 			break;
2372 		case PFSYNC_S_IACK:
2373 			return PFSYNC_Q_IACK;
2374 		case PFSYNC_S_UPD:
2375 			switch (sc->sc_version) {
2376 				case PFSYNC_MSG_VERSION_1301:
2377 					return PFSYNC_Q_UPD_1301;
2378 				case PFSYNC_MSG_VERSION_1400:
2379 					return PFSYNC_Q_UPD_1400;
2380 			}
2381 			break;
2382 		case PFSYNC_S_UPD_C:
2383 			return PFSYNC_Q_UPD_C;
2384 		case PFSYNC_S_DEL_C:
2385 			return PFSYNC_Q_DEL_C;
2386 		default:
2387 			panic("%s: Unsupported st->sync_state 0x%02x",
2388 			__func__, sync_state);
2389 	}
2390 
2391 	panic("%s: Unsupported pfsync_msg_version %d",
2392 	    __func__, sc->sc_version);
2393 }
2394 
2395 static void
2396 pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref)
2397 {
2398 	enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state);
2399 	struct pfsync_softc *sc = V_pfsyncif;
2400 	size_t nlen = pfsync_qs[q].len;
2401 	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2402 
2403 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2404 
2405 	KASSERT(st->sync_state == PFSYNC_S_NONE,
2406 		("%s: st->sync_state %u", __func__, st->sync_state));
2407 	KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2408 	    b->b_len));
2409 
2410 	if (TAILQ_EMPTY(&b->b_qs[q]))
2411 		nlen += sizeof(struct pfsync_subheader);
2412 
2413 	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2414 		pfsync_sendout(1, b->b_id);
2415 
2416 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2417 	}
2418 
2419 	b->b_len += nlen;
2420 	st->sync_state = pfsync_qid_sstate[q];
2421 	TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
2422 	if (ref)
2423 		pf_ref_state(st);
2424 }
2425 
2426 static void
2427 pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b)
2428 {
2429 	enum pfsync_q_id q;
2430 
2431 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2432 	KASSERT(st->sync_state != PFSYNC_S_NONE,
2433 		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2434 
2435 	q =  pfsync_sstate_to_qid(st->sync_state);
2436 	b->b_len -= pfsync_qs[q].len;
2437 	TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
2438 	st->sync_state = PFSYNC_S_NONE;
2439 	if (unref)
2440 		pf_release_state(st);
2441 
2442 	if (TAILQ_EMPTY(&b->b_qs[q]))
2443 		b->b_len -= sizeof(struct pfsync_subheader);
2444 }
2445 
2446 static void
2447 pfsync_bulk_start(void)
2448 {
2449 	struct pfsync_softc *sc = V_pfsyncif;
2450 
2451 	if (V_pf_status.debug >= PF_DEBUG_MISC)
2452 		printf("pfsync: received bulk update request\n");
2453 
2454 	PFSYNC_BLOCK(sc);
2455 
2456 	sc->sc_ureq_received = time_uptime;
2457 	sc->sc_bulk_hashid = 0;
2458 	sc->sc_bulk_stateid = 0;
2459 	pfsync_bulk_status(PFSYNC_BUS_START);
2460 	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2461 	PFSYNC_BUNLOCK(sc);
2462 }
2463 
2464 static void
2465 pfsync_bulk_update(void *arg)
2466 {
2467 	struct pfsync_softc *sc = arg;
2468 	struct pf_kstate *s;
2469 	int i;
2470 
2471 	PFSYNC_BLOCK_ASSERT(sc);
2472 	CURVNET_SET(sc->sc_ifp->if_vnet);
2473 
2474 	/*
2475 	 * Start with last state from previous invocation.
2476 	 * It may had gone, in this case start from the
2477 	 * hash slot.
2478 	 */
2479 	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2480 
2481 	if (s != NULL)
2482 		i = PF_IDHASH(s);
2483 	else
2484 		i = sc->sc_bulk_hashid;
2485 
2486 	for (; i <= pf_hashmask; i++) {
2487 		struct pf_idhash *ih = &V_pf_idhash[i];
2488 
2489 		if (s != NULL)
2490 			PF_HASHROW_ASSERT(ih);
2491 		else {
2492 			PF_HASHROW_LOCK(ih);
2493 			s = LIST_FIRST(&ih->states);
2494 		}
2495 
2496 		for (; s; s = LIST_NEXT(s, entry)) {
2497 			if (s->sync_state == PFSYNC_S_NONE &&
2498 			    s->timeout < PFTM_MAX &&
2499 			    s->pfsync_time <= sc->sc_ureq_received) {
2500 				if (pfsync_update_state_req(s)) {
2501 					/* We've filled a packet. */
2502 					sc->sc_bulk_hashid = i;
2503 					sc->sc_bulk_stateid = s->id;
2504 					sc->sc_bulk_creatorid = s->creatorid;
2505 					PF_HASHROW_UNLOCK(ih);
2506 					callout_reset(&sc->sc_bulk_tmo, 1,
2507 					    pfsync_bulk_update, sc);
2508 					goto full;
2509 				}
2510 			}
2511 		}
2512 		PF_HASHROW_UNLOCK(ih);
2513 	}
2514 
2515 	/* We're done. */
2516 	pfsync_bulk_status(PFSYNC_BUS_END);
2517 full:
2518 	CURVNET_RESTORE();
2519 }
2520 
2521 static void
2522 pfsync_bulk_status(u_int8_t status)
2523 {
2524 	struct {
2525 		struct pfsync_subheader subh;
2526 		struct pfsync_bus bus;
2527 	} __packed r;
2528 
2529 	struct pfsync_softc *sc = V_pfsyncif;
2530 
2531 	bzero(&r, sizeof(r));
2532 
2533 	r.subh.action = PFSYNC_ACT_BUS;
2534 	r.subh.count = htons(1);
2535 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2536 
2537 	r.bus.creatorid = V_pf_status.hostid;
2538 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2539 	r.bus.status = status;
2540 
2541 	pfsync_send_plus(&r, sizeof(r));
2542 }
2543 
2544 static void
2545 pfsync_bulk_fail(void *arg)
2546 {
2547 	struct pfsync_softc *sc = arg;
2548 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2549 
2550 	CURVNET_SET(sc->sc_ifp->if_vnet);
2551 
2552 	PFSYNC_BLOCK_ASSERT(sc);
2553 
2554 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2555 		/* Try again */
2556 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2557 		    pfsync_bulk_fail, V_pfsyncif);
2558 		PFSYNC_BUCKET_LOCK(b);
2559 		pfsync_request_update(0, 0);
2560 		PFSYNC_BUCKET_UNLOCK(b);
2561 	} else {
2562 		/* Pretend like the transfer was ok. */
2563 		sc->sc_ureq_sent = 0;
2564 		sc->sc_bulk_tries = 0;
2565 		PFSYNC_LOCK(sc);
2566 		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2567 			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
2568 			    "pfsync bulk fail");
2569 		sc->sc_flags |= PFSYNCF_OK;
2570 		PFSYNC_UNLOCK(sc);
2571 		if (V_pf_status.debug >= PF_DEBUG_MISC)
2572 			printf("pfsync: failed to receive bulk update\n");
2573 	}
2574 
2575 	CURVNET_RESTORE();
2576 }
2577 
2578 static void
2579 pfsync_send_plus(void *plus, size_t pluslen)
2580 {
2581 	struct pfsync_softc *sc = V_pfsyncif;
2582 	struct pfsync_bucket *b = &sc->sc_buckets[0];
2583 	uint8_t *newplus;
2584 
2585 	PFSYNC_BUCKET_LOCK(b);
2586 
2587 	if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
2588 		pfsync_sendout(1, b->b_id);
2589 
2590 	newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT);
2591 	if (newplus == NULL)
2592 		goto out;
2593 
2594 	if (b->b_plus != NULL) {
2595 		memcpy(newplus, b->b_plus, b->b_pluslen);
2596 		free(b->b_plus, M_PFSYNC);
2597 	} else {
2598 		MPASS(b->b_pluslen == 0);
2599 	}
2600 	memcpy(newplus + b->b_pluslen, plus, pluslen);
2601 
2602 	b->b_plus = newplus;
2603 	b->b_pluslen += pluslen;
2604 	b->b_len += pluslen;
2605 
2606 	pfsync_sendout(1, b->b_id);
2607 
2608 out:
2609 	PFSYNC_BUCKET_UNLOCK(b);
2610 }
2611 
2612 static void
2613 pfsync_timeout(void *arg)
2614 {
2615 	struct pfsync_bucket *b = arg;
2616 
2617 	CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
2618 	PFSYNC_BUCKET_LOCK(b);
2619 	pfsync_push(b);
2620 	PFSYNC_BUCKET_UNLOCK(b);
2621 	CURVNET_RESTORE();
2622 }
2623 
2624 static void
2625 pfsync_push(struct pfsync_bucket *b)
2626 {
2627 
2628 	PFSYNC_BUCKET_LOCK_ASSERT(b);
2629 
2630 	b->b_flags |= PFSYNCF_BUCKET_PUSH;
2631 	swi_sched(V_pfsync_swi_cookie, 0);
2632 }
2633 
2634 static void
2635 pfsync_push_all(struct pfsync_softc *sc)
2636 {
2637 	int c;
2638 	struct pfsync_bucket *b;
2639 
2640 	for (c = 0; c < pfsync_buckets; c++) {
2641 		b = &sc->sc_buckets[c];
2642 
2643 		PFSYNC_BUCKET_LOCK(b);
2644 		pfsync_push(b);
2645 		PFSYNC_BUCKET_UNLOCK(b);
2646 	}
2647 }
2648 
2649 static void
2650 pfsync_tx(struct pfsync_softc *sc, struct mbuf *m)
2651 {
2652 	struct ip *ip;
2653 	int af, error = 0;
2654 
2655 	ip = mtod(m, struct ip *);
2656 	MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4));
2657 
2658 	af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6;
2659 
2660 	/*
2661 	 * We distinguish between a deferral packet and our
2662 	 * own pfsync packet based on M_SKIP_FIREWALL
2663 	 * flag. This is XXX.
2664 	 */
2665 	switch (af) {
2666 #ifdef INET
2667 	case AF_INET:
2668 		if (m->m_flags & M_SKIP_FIREWALL) {
2669 			error = ip_output(m, NULL, NULL, 0,
2670 			    NULL, NULL);
2671 		} else {
2672 			error = ip_output(m, NULL, NULL,
2673 			    IP_RAWOUTPUT, &sc->sc_imo, NULL);
2674 		}
2675 		break;
2676 #endif
2677 #ifdef INET6
2678 	case AF_INET6:
2679 		if (m->m_flags & M_SKIP_FIREWALL) {
2680 			error = ip6_output(m, NULL, NULL, 0,
2681 			    NULL, NULL, NULL);
2682 		} else {
2683 			error = ip6_output(m, NULL, NULL, 0,
2684 				&sc->sc_im6o, NULL, NULL);
2685 		}
2686 		break;
2687 #endif
2688 	}
2689 
2690 	if (error == 0)
2691 		V_pfsyncstats.pfsyncs_opackets++;
2692 	else
2693 		V_pfsyncstats.pfsyncs_oerrors++;
2694 
2695 }
2696 
2697 static void
2698 pfsyncintr(void *arg)
2699 {
2700 	struct epoch_tracker et;
2701 	struct pfsync_softc *sc = arg;
2702 	struct pfsync_bucket *b;
2703 	struct mbuf *m, *n;
2704 	int c;
2705 
2706 	NET_EPOCH_ENTER(et);
2707 	CURVNET_SET(sc->sc_ifp->if_vnet);
2708 
2709 	for (c = 0; c < pfsync_buckets; c++) {
2710 		b = &sc->sc_buckets[c];
2711 
2712 		PFSYNC_BUCKET_LOCK(b);
2713 		if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
2714 			pfsync_sendout(0, b->b_id);
2715 			b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
2716 		}
2717 		_IF_DEQUEUE_ALL(&b->b_snd, m);
2718 		PFSYNC_BUCKET_UNLOCK(b);
2719 
2720 		for (; m != NULL; m = n) {
2721 			n = m->m_nextpkt;
2722 			m->m_nextpkt = NULL;
2723 
2724 			pfsync_tx(sc, m);
2725 		}
2726 	}
2727 	CURVNET_RESTORE();
2728 	NET_EPOCH_EXIT(et);
2729 }
2730 
2731 static int
2732 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
2733     struct in_mfilter* imf, struct in6_mfilter* im6f)
2734 {
2735 #ifdef  INET
2736 	struct ip_moptions *imo = &sc->sc_imo;
2737 #endif
2738 #ifdef INET6
2739 	struct ip6_moptions *im6o = &sc->sc_im6o;
2740 	struct sockaddr_in6 *syncpeer_sa6 = NULL;
2741 #endif
2742 
2743 	if (!(ifp->if_flags & IFF_MULTICAST))
2744 		return (EADDRNOTAVAIL);
2745 
2746 	switch (sc->sc_sync_peer.ss_family) {
2747 #ifdef INET
2748 	case AF_INET:
2749 	{
2750 		int error;
2751 
2752 		ip_mfilter_init(&imo->imo_head);
2753 		imo->imo_multicast_vif = -1;
2754 		if ((error = in_joingroup(ifp,
2755 		    &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL,
2756 		    &imf->imf_inm)) != 0)
2757 			return (error);
2758 
2759 		ip_mfilter_insert(&imo->imo_head, imf);
2760 		imo->imo_multicast_ifp = ifp;
2761 		imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2762 		imo->imo_multicast_loop = 0;
2763 		break;
2764 	}
2765 #endif
2766 #ifdef INET6
2767 	case AF_INET6:
2768 	{
2769 		int error;
2770 
2771 		syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer;
2772 		if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL)))
2773 			return (error);
2774 
2775 		ip6_mfilter_init(&im6o->im6o_head);
2776 		if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL,
2777 		    &(im6f->im6f_in6m), 0)) != 0)
2778 			return (error);
2779 
2780 		ip6_mfilter_insert(&im6o->im6o_head, im6f);
2781 		im6o->im6o_multicast_ifp = ifp;
2782 		im6o->im6o_multicast_hlim = PFSYNC_DFLTTL;
2783 		im6o->im6o_multicast_loop = 0;
2784 		break;
2785 	}
2786 #endif
2787 	}
2788 
2789 	return (0);
2790 }
2791 
2792 static void
2793 pfsync_multicast_cleanup(struct pfsync_softc *sc)
2794 {
2795 #ifdef INET
2796 	struct ip_moptions *imo = &sc->sc_imo;
2797 	struct in_mfilter *imf;
2798 
2799 	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
2800 		ip_mfilter_remove(&imo->imo_head, imf);
2801 		in_leavegroup(imf->imf_inm, NULL);
2802 		ip_mfilter_free(imf);
2803 	}
2804 	imo->imo_multicast_ifp = NULL;
2805 #endif
2806 
2807 #ifdef INET6
2808 	struct ip6_moptions *im6o = &sc->sc_im6o;
2809 	struct in6_mfilter *im6f;
2810 
2811 	while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
2812 		ip6_mfilter_remove(&im6o->im6o_head, im6f);
2813 		in6_leavegroup(im6f->im6f_in6m, NULL);
2814 		ip6_mfilter_free(im6f);
2815 	}
2816 	im6o->im6o_multicast_ifp = NULL;
2817 #endif
2818 }
2819 
2820 void
2821 pfsync_detach_ifnet(struct ifnet *ifp)
2822 {
2823 	struct pfsync_softc *sc = V_pfsyncif;
2824 
2825 	if (sc == NULL)
2826 		return;
2827 
2828 	PFSYNC_LOCK(sc);
2829 
2830 	if (sc->sc_sync_if == ifp) {
2831 		/* We don't need mutlicast cleanup here, because the interface
2832 		 * is going away. We do need to ensure we don't try to do
2833 		 * cleanup later.
2834 		 */
2835 		ip_mfilter_init(&sc->sc_imo.imo_head);
2836 		sc->sc_imo.imo_multicast_ifp = NULL;
2837 		sc->sc_im6o.im6o_multicast_ifp = NULL;
2838 		sc->sc_sync_if = NULL;
2839 	}
2840 
2841 	PFSYNC_UNLOCK(sc);
2842 }
2843 
2844 static int
2845 pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status)
2846 {
2847 	struct sockaddr_storage sa;
2848 	status->maxupdates = pfsyncr->pfsyncr_maxupdates;
2849 	status->flags = pfsyncr->pfsyncr_defer;
2850 
2851 	strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ);
2852 
2853 	memset(&sa, 0, sizeof(sa));
2854 	if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) {
2855 		struct sockaddr_in *in = (struct sockaddr_in *)&sa;
2856 		in->sin_family = AF_INET;
2857 		in->sin_len = sizeof(*in);
2858 		in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr;
2859 	}
2860 	status->syncpeer = sa;
2861 
2862 	return 0;
2863 }
2864 
2865 static int
2866 pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc)
2867 {
2868 	struct ifnet *sifp;
2869 	struct in_mfilter *imf = NULL;
2870 	struct in6_mfilter *im6f = NULL;
2871 	int error;
2872 	int c;
2873 
2874 	if ((status->maxupdates < 0) || (status->maxupdates > 255))
2875 		return (EINVAL);
2876 
2877 	if (status->syncdev[0] == '\0')
2878 		sifp = NULL;
2879 	else if ((sifp = ifunit_ref(status->syncdev)) == NULL)
2880 		return (EINVAL);
2881 
2882 	switch (status->syncpeer.ss_family) {
2883 #ifdef INET
2884 	case AF_UNSPEC:
2885 	case AF_INET: {
2886 		struct sockaddr_in *status_sin;
2887 		status_sin = (struct sockaddr_in *)&(status->syncpeer);
2888 		if (sifp != NULL) {
2889 			if (status_sin->sin_addr.s_addr == 0 ||
2890 			    status_sin->sin_addr.s_addr ==
2891 			    htonl(INADDR_PFSYNC_GROUP)) {
2892 				status_sin->sin_family = AF_INET;
2893 				status_sin->sin_len = sizeof(*status_sin);
2894 				status_sin->sin_addr.s_addr =
2895 				    htonl(INADDR_PFSYNC_GROUP);
2896 			}
2897 
2898 			if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) {
2899 				imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
2900 			}
2901 		}
2902 		break;
2903 	}
2904 #endif
2905 #ifdef INET6
2906 	case AF_INET6: {
2907 		struct sockaddr_in6 *status_sin6;
2908 		status_sin6 = (struct sockaddr_in6*)&(status->syncpeer);
2909 		if (sifp != NULL) {
2910 			if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) ||
2911 			    IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr,
2912 				&in6addr_linklocal_pfsync_group)) {
2913 				status_sin6->sin6_family = AF_INET6;
2914 				status_sin6->sin6_len = sizeof(*status_sin6);
2915 				status_sin6->sin6_addr =
2916 				    in6addr_linklocal_pfsync_group;
2917 			}
2918 
2919 			if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) {
2920 				im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0);
2921 			}
2922 		}
2923 		break;
2924 	}
2925 #endif
2926 	}
2927 
2928 	PFSYNC_LOCK(sc);
2929 
2930 	switch (status->version) {
2931 		case PFSYNC_MSG_VERSION_UNSPECIFIED:
2932 			sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT;
2933 			break;
2934 		case PFSYNC_MSG_VERSION_1301:
2935 		case PFSYNC_MSG_VERSION_1400:
2936 			sc->sc_version = status->version;
2937 			break;
2938 		default:
2939 			PFSYNC_UNLOCK(sc);
2940 			return (EINVAL);
2941 	}
2942 
2943 	switch (status->syncpeer.ss_family) {
2944 	case AF_INET: {
2945 		struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer);
2946 		struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer;
2947 		sc_sin->sin_family = AF_INET;
2948 		sc_sin->sin_len = sizeof(*sc_sin);
2949 		if (status_sin->sin_addr.s_addr == 0) {
2950 			sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
2951 		} else {
2952 			sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr;
2953 		}
2954 		break;
2955 	}
2956 	case AF_INET6: {
2957 		struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer);
2958 		struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer;
2959 		sc_sin->sin6_family = AF_INET6;
2960 		sc_sin->sin6_len = sizeof(*sc_sin);
2961 		if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) {
2962 			sc_sin->sin6_addr = in6addr_linklocal_pfsync_group;
2963 		} else {
2964 			sc_sin->sin6_addr = status_sin->sin6_addr;
2965 		}
2966 		break;
2967 	}
2968 	}
2969 
2970 	sc->sc_maxupdates = status->maxupdates;
2971 	if (status->flags & PFSYNCF_DEFER) {
2972 		sc->sc_flags |= PFSYNCF_DEFER;
2973 		V_pfsync_defer_ptr = pfsync_defer;
2974 	} else {
2975 		sc->sc_flags &= ~PFSYNCF_DEFER;
2976 		V_pfsync_defer_ptr = NULL;
2977 	}
2978 
2979 	if (sifp == NULL) {
2980 		if (sc->sc_sync_if)
2981 			if_rele(sc->sc_sync_if);
2982 		sc->sc_sync_if = NULL;
2983 		pfsync_multicast_cleanup(sc);
2984 		PFSYNC_UNLOCK(sc);
2985 		return (0);
2986 	}
2987 
2988 	for (c = 0; c < pfsync_buckets; c++) {
2989 		PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
2990 		if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
2991 		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
2992 			(sc->sc_sync_if != NULL &&
2993 			    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
2994 			sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
2995 			pfsync_sendout(1, c);
2996 		PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
2997 	}
2998 
2999 	pfsync_multicast_cleanup(sc);
3000 
3001 	if (((sc->sc_sync_peer.ss_family == AF_INET) &&
3002 	    IN_MULTICAST(ntohl(((struct sockaddr_in *)
3003 	        &sc->sc_sync_peer)->sin_addr.s_addr))) ||
3004 	    ((sc->sc_sync_peer.ss_family == AF_INET6) &&
3005 	    IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*)
3006 	        &sc->sc_sync_peer)->sin6_addr))) {
3007 		error = pfsync_multicast_setup(sc, sifp, imf, im6f);
3008 		if (error) {
3009 			if_rele(sifp);
3010 			PFSYNC_UNLOCK(sc);
3011 #ifdef INET
3012 			if (imf != NULL)
3013 				ip_mfilter_free(imf);
3014 #endif
3015 #ifdef INET6
3016 			if (im6f != NULL)
3017 				ip6_mfilter_free(im6f);
3018 #endif
3019 			return (error);
3020 		}
3021 	}
3022 	if (sc->sc_sync_if)
3023 		if_rele(sc->sc_sync_if);
3024 	sc->sc_sync_if = sifp;
3025 
3026 	switch (sc->sc_sync_peer.ss_family) {
3027 #ifdef INET
3028 	case AF_INET: {
3029 		struct ip *ip;
3030 		ip = &sc->sc_template.ipv4;
3031 		bzero(ip, sizeof(*ip));
3032 		ip->ip_v = IPVERSION;
3033 		ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2;
3034 		ip->ip_tos = IPTOS_LOWDELAY;
3035 		/* len and id are set later. */
3036 		ip->ip_off = htons(IP_DF);
3037 		ip->ip_ttl = PFSYNC_DFLTTL;
3038 		ip->ip_p = IPPROTO_PFSYNC;
3039 		ip->ip_src.s_addr = INADDR_ANY;
3040 		ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr;
3041 		break;
3042 	}
3043 #endif
3044 #ifdef INET6
3045 	case AF_INET6: {
3046 		struct ip6_hdr *ip6;
3047 		ip6 = &sc->sc_template.ipv6;
3048 		bzero(ip6, sizeof(*ip6));
3049 		ip6->ip6_vfc = IPV6_VERSION;
3050 		ip6->ip6_hlim = PFSYNC_DFLTTL;
3051 		ip6->ip6_nxt = IPPROTO_PFSYNC;
3052 		ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr;
3053 
3054 		struct epoch_tracker et;
3055 		NET_EPOCH_ENTER(et);
3056 		in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0,
3057 		    sc->sc_sync_if, &ip6->ip6_src, NULL);
3058 		NET_EPOCH_EXIT(et);
3059 		break;
3060 	}
3061 #endif
3062 	}
3063 
3064 	/* Request a full state table update. */
3065 	if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
3066 		(*carp_demote_adj_p)(V_pfsync_carp_adj,
3067 		    "pfsync bulk start");
3068 	sc->sc_flags &= ~PFSYNCF_OK;
3069 	if (V_pf_status.debug >= PF_DEBUG_MISC)
3070 		printf("pfsync: requesting bulk update\n");
3071 	PFSYNC_UNLOCK(sc);
3072 	PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
3073 	pfsync_request_update(0, 0);
3074 	PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
3075 	PFSYNC_BLOCK(sc);
3076 	sc->sc_ureq_sent = time_uptime;
3077 	callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc);
3078 	PFSYNC_BUNLOCK(sc);
3079 	return (0);
3080 }
3081 
3082 static void
3083 pfsync_pointers_init(void)
3084 {
3085 
3086 	PF_RULES_WLOCK();
3087 	V_pfsync_state_import_ptr = pfsync_state_import;
3088 	V_pfsync_insert_state_ptr = pfsync_insert_state;
3089 	V_pfsync_update_state_ptr = pfsync_update_state;
3090 	V_pfsync_delete_state_ptr = pfsync_delete_state;
3091 	V_pfsync_clear_states_ptr = pfsync_clear_states;
3092 	V_pfsync_defer_ptr = pfsync_defer;
3093 	PF_RULES_WUNLOCK();
3094 }
3095 
3096 static void
3097 pfsync_pointers_uninit(void)
3098 {
3099 
3100 	PF_RULES_WLOCK();
3101 	V_pfsync_state_import_ptr = NULL;
3102 	V_pfsync_insert_state_ptr = NULL;
3103 	V_pfsync_update_state_ptr = NULL;
3104 	V_pfsync_delete_state_ptr = NULL;
3105 	V_pfsync_clear_states_ptr = NULL;
3106 	V_pfsync_defer_ptr = NULL;
3107 	PF_RULES_WUNLOCK();
3108 }
3109 
3110 static void
3111 vnet_pfsync_init(const void *unused __unused)
3112 {
3113 	int error;
3114 
3115 	V_pfsync_cloner = if_clone_simple(pfsyncname,
3116 	    pfsync_clone_create, pfsync_clone_destroy, 1);
3117 	error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif,
3118 	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3119 	if (error) {
3120 		if_clone_detach(V_pfsync_cloner);
3121 		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
3122 	}
3123 
3124 	pfsync_pointers_init();
3125 }
3126 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
3127     vnet_pfsync_init, NULL);
3128 
3129 static void
3130 vnet_pfsync_uninit(const void *unused __unused)
3131 {
3132 	int ret __diagused;
3133 
3134 	pfsync_pointers_uninit();
3135 
3136 	if_clone_detach(V_pfsync_cloner);
3137 	ret = swi_remove(V_pfsync_swi_cookie);
3138 	MPASS(ret == 0);
3139 	ret = intr_event_destroy(V_pfsync_swi_ie);
3140 	MPASS(ret == 0);
3141 }
3142 
3143 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
3144     vnet_pfsync_uninit, NULL);
3145 
3146 static int
3147 pfsync_init(void)
3148 {
3149 	int error;
3150 
3151 	pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
3152 
3153 #ifdef INET
3154 	error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL);
3155 	if (error)
3156 		return (error);
3157 #endif
3158 #ifdef INET6
3159 	error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL);
3160 	if (error) {
3161 		ipproto_unregister(IPPROTO_PFSYNC);
3162 		return (error);
3163 	}
3164 #endif
3165 
3166 	return (0);
3167 }
3168 
3169 static void
3170 pfsync_uninit(void)
3171 {
3172 	pfsync_detach_ifnet_ptr = NULL;
3173 
3174 #ifdef INET
3175 	ipproto_unregister(IPPROTO_PFSYNC);
3176 #endif
3177 #ifdef INET6
3178 	ip6proto_unregister(IPPROTO_PFSYNC);
3179 #endif
3180 }
3181 
3182 static int
3183 pfsync_modevent(module_t mod, int type, void *data)
3184 {
3185 	int error = 0;
3186 
3187 	switch (type) {
3188 	case MOD_LOAD:
3189 		error = pfsync_init();
3190 		break;
3191 	case MOD_UNLOAD:
3192 		pfsync_uninit();
3193 		break;
3194 	default:
3195 		error = EINVAL;
3196 		break;
3197 	}
3198 
3199 	return (error);
3200 }
3201 
3202 static moduledata_t pfsync_mod = {
3203 	pfsyncname,
3204 	pfsync_modevent,
3205 	0
3206 };
3207 
3208 #define PFSYNC_MODVER 1
3209 
3210 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
3211 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
3212 MODULE_VERSION(pfsync, PFSYNC_MODVER);
3213 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3214