xref: /freebsd/sys/netpfil/pf/pflow.c (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 /*	$OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $	*/
2 
3 /*
4  * Copyright (c) 2023 Rubicon Communications, LLC (Netgate)
5  * Copyright (c) 2011 Florian Obser <florian@narrans.de>
6  * Copyright (c) 2011 Sebastian Benoit <benoit-lists@fb12.de>
7  * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
8  * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
9  *
10  * Permission to use, copy, modify, and distribute this software for any
11  * purpose with or without fee is hereby granted, provided that the above
12  * copyright notice and this permission notice appear in all copies.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
20  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #include <sys/cdefs.h>
24 #include <sys/param.h>
25 #include <sys/bus.h>
26 #include <sys/callout.h>
27 #include <sys/endian.h>
28 #include <sys/interrupt.h>
29 #include <sys/jail.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/module.h>
33 #include <sys/mbuf.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <sys/sockio.h>
37 #include <sys/sysctl.h>
38 #include <sys/systm.h>
39 #include <sys/priv.h>
40 
41 #include <net/if.h>
42 #include <net/if_types.h>
43 #include <net/bpf.h>
44 #include <net/route.h>
45 #include <netinet/in.h>
46 #include <netinet/if_ether.h>
47 #include <netinet/tcp.h>
48 
49 #include <netinet/ip.h>
50 #include <netinet/ip_icmp.h>
51 #include <netinet/ip_var.h>
52 #include <netinet/udp.h>
53 #include <netinet/udp_var.h>
54 #include <netinet/in_pcb.h>
55 
56 #include <netlink/netlink.h>
57 #include <netlink/netlink_ctl.h>
58 #include <netlink/netlink_generic.h>
59 #include <netlink/netlink_message_writer.h>
60 
61 #include <net/pfvar.h>
62 #include <net/pflow.h>
63 #include "net/if_var.h"
64 
65 #define PFLOW_MINMTU	\
66     (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
67 
68 #ifdef PFLOWDEBUG
69 #define DPRINTF(x)	do { printf x ; } while (0)
70 #else
71 #define DPRINTF(x)
72 #endif
73 
74 enum pflow_family_t {
75 	PFLOW_INET,
76 	PFLOW_INET6,
77 	PFLOW_NAT4,
78 };
79 
80 static void	pflow_output_process(void *);
81 static int	pflow_create(int);
82 static int	pflow_destroy(int, bool);
83 static int	pflow_calc_mtu(struct pflow_softc *, int, int);
84 static void	pflow_setmtu(struct pflow_softc *, int);
85 static int	pflowvalidsockaddr(const struct sockaddr *, int);
86 
87 static struct mbuf	*pflow_get_mbuf(struct pflow_softc *, u_int16_t);
88 static void	pflow_flush(struct pflow_softc *);
89 static int	pflow_sendout_v5(struct pflow_softc *);
90 static int	pflow_sendout_ipfix(struct pflow_softc *, enum pflow_family_t);
91 static int	pflow_sendout_ipfix_tmpl(struct pflow_softc *);
92 static int	pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
93 static int	sysctl_pflowstats(SYSCTL_HANDLER_ARGS);
94 static void	pflow_timeout(void *);
95 static void	pflow_timeout6(void *);
96 static void	pflow_timeout_tmpl(void *);
97 static void	pflow_timeout_nat4(void *);
98 static void	copy_flow_data(struct pflow_flow *, struct pflow_flow *,
99 	const struct pf_kstate *, struct pf_state_key *, int, int);
100 static void	copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *,
101 	struct pflow_ipfix_flow4 *, const struct pf_kstate *, struct pf_state_key *,
102 	struct pflow_softc *, int, int);
103 static void	copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *,
104 	struct pflow_ipfix_flow6 *, const struct pf_kstate *, struct pf_state_key *,
105 	struct pflow_softc *, int, int);
106 static int	pflow_pack_flow(const struct pf_kstate *, struct pf_state_key *,
107 	struct pflow_softc *);
108 static int	pflow_pack_flow_ipfix(const struct pf_kstate *, struct pf_state_key *,
109 	struct pflow_softc *);
110 static void	export_pflow(const struct pf_kstate *);
111 static int	export_pflow_if(const struct pf_kstate*, struct pf_state_key *,
112 	struct pflow_softc *);
113 static int	copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
114 static int	copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow,
115 	struct pflow_softc *sc);
116 static int	copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow,
117 	struct pflow_softc *sc);
118 static int	copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *,
119 	const struct pf_kstate *, struct pflow_softc *,
120 	uint8_t, uint64_t);
121 
122 static const char pflowname[] = "pflow";
123 
124 enum pflowstat_counters {
125 	pflow_flows,
126 	pflow_packets,
127 	pflow_onomem,
128 	pflow_oerrors,
129 	pflow_ncounters,
130 };
131 struct pflowstats_ctr {
132 	counter_u64_t	c[pflow_ncounters];
133 };
134 
135 /**
136  * Locking concept
137  *
138  * The list of pflow devices (V_pflowif_list) is managed through epoch.
139  * It is safe to read the list without locking (while in NET_EPOCH).
140  * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx
141  * on every add/delete.
142  *
143  * Each pflow interface protects its own data with the sc_lock mutex.
144  *
145  * We do not require any pf locks, and in fact expect to be called without
146  * hashrow locks held.
147  **/
148 
149 VNET_DEFINE(struct unrhdr *,	pflow_unr);
150 #define	V_pflow_unr	VNET(pflow_unr)
151 VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list);
152 #define	V_pflowif_list	VNET(pflowif_list)
153 VNET_DEFINE(struct mtx, pflowif_list_mtx);
154 #define	V_pflowif_list_mtx	VNET(pflowif_list_mtx)
155 VNET_DEFINE(struct pflowstats_ctr,	 pflowstat);
156 #define	V_pflowstats	VNET(pflowstat)
157 
158 #define	PFLOW_LOCK(_sc)		mtx_lock(&(_sc)->sc_lock)
159 #define	PFLOW_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_lock)
160 #define	PFLOW_ASSERT(_sc)	mtx_assert(&(_sc)->sc_lock, MA_OWNED)
161 
162 SYSCTL_NODE(_net, OID_AUTO, pflow, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
163     "PFLOW");
164 SYSCTL_PROC(_net_pflow, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
165     0, 0, sysctl_pflowstats, "S,pflowstats",
166     "PFLOW statistics (struct pflowstats, net/if_pflow.h)");
167 
168 static inline void
169 pflowstat_inc(enum pflowstat_counters c)
170 {
171 	counter_u64_add(V_pflowstats.c[c], 1);
172 }
173 
174 static void
175 vnet_pflowattach(void)
176 {
177 	CK_LIST_INIT(&V_pflowif_list);
178 	mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF);
179 
180 	V_pflow_unr = new_unrhdr(0, PFLOW_MAX_ENTRIES - 1, &V_pflowif_list_mtx);
181 
182 	for (int i = 0; i < pflow_ncounters; i++)
183 		V_pflowstats.c[i] = counter_u64_alloc(M_WAITOK);
184 }
185 VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
186     vnet_pflowattach, NULL);
187 
188 static int
189 pflow_jail_remove(void *obj, void *data __unused)
190 {
191 #ifdef VIMAGE
192 	const struct prison *pr = obj;
193 #endif
194 	struct pflow_softc	*sc;
195 
196 	CURVNET_SET(pr->pr_vnet);
197 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
198 		pflow_destroy(sc->sc_id, false);
199 	}
200 	CURVNET_RESTORE();
201 
202 	return (0);
203 }
204 
205 static void
206 vnet_pflowdetach(void)
207 {
208 
209 	/* Should have been done by pflow_jail_remove() */
210 	MPASS(CK_LIST_EMPTY(&V_pflowif_list));
211 	delete_unrhdr(V_pflow_unr);
212 	mtx_destroy(&V_pflowif_list_mtx);
213 
214 	for (int i = 0; i < pflow_ncounters; i++)
215 		counter_u64_free(V_pflowstats.c[i]);
216 }
217 VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
218     vnet_pflowdetach, NULL);
219 
220 static void
221 vnet_pflow_finalise(void)
222 {
223 	/*
224 	 * Ensure we've freed all interfaces, and do not have pending
225 	 * epoch cleanup calls.
226 	 */
227 	NET_EPOCH_DRAIN_CALLBACKS();
228 }
229 VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
230     vnet_pflow_finalise, NULL);
231 
232 static void
233 pflow_output_process(void *arg)
234 {
235 	struct mbufq ml;
236 	struct pflow_softc *sc = arg;
237 	struct mbuf *m;
238 
239 	mbufq_init(&ml, 0);
240 
241 	PFLOW_LOCK(sc);
242 	mbufq_concat(&ml, &sc->sc_outputqueue);
243 	PFLOW_UNLOCK(sc);
244 
245 	CURVNET_SET(sc->sc_vnet);
246 	while ((m = mbufq_dequeue(&ml)) != NULL) {
247 		pflow_sendout_mbuf(sc, m);
248 	}
249 	CURVNET_RESTORE();
250 }
251 
252 static int
253 pflow_create(int unit)
254 {
255 	struct pflow_softc	*pflowif;
256 	int			 error;
257 
258 	pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
259 	mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF);
260 	pflowif->sc_version = PFLOW_PROTO_DEFAULT;
261 	pflowif->sc_observation_dom = PFLOW_ENGINE_TYPE;
262 
263 	/* ipfix template init */
264 	bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix));
265 	pflowif->sc_tmpl_ipfix.set_header.set_id =
266 	    htons(PFLOW_IPFIX_TMPL_SET_ID);
267 	pflowif->sc_tmpl_ipfix.set_header.set_length =
268 	    htons(sizeof(struct pflow_ipfix_tmpl));
269 
270 	/* ipfix IPv4 template */
271 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id =
272 	    htons(PFLOW_IPFIX_TMPL_IPV4_ID);
273 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count
274 	    = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT);
275 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id =
276 	    htons(PFIX_IE_sourceIPv4Address);
277 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4);
278 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id =
279 	    htons(PFIX_IE_destinationIPv4Address);
280 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4);
281 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id =
282 	    htons(PFIX_IE_ingressInterface);
283 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4);
284 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id =
285 	    htons(PFIX_IE_egressInterface);
286 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4);
287 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id =
288 	    htons(PFIX_IE_packetDeltaCount);
289 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8);
290 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id =
291 	    htons(PFIX_IE_octetDeltaCount);
292 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8);
293 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id =
294 	    htons(PFIX_IE_flowStartMilliseconds);
295 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8);
296 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id =
297 	    htons(PFIX_IE_flowEndMilliseconds);
298 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8);
299 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id =
300 	    htons(PFIX_IE_sourceTransportPort);
301 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2);
302 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id =
303 	    htons(PFIX_IE_destinationTransportPort);
304 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2);
305 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id =
306 	    htons(PFIX_IE_ipClassOfService);
307 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1);
308 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id =
309 	    htons(PFIX_IE_protocolIdentifier);
310 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1);
311 
312 	/* ipfix IPv6 template */
313 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id =
314 	    htons(PFLOW_IPFIX_TMPL_IPV6_ID);
315 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count =
316 	    htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT);
317 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id =
318 	    htons(PFIX_IE_sourceIPv6Address);
319 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16);
320 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id =
321 	    htons(PFIX_IE_destinationIPv6Address);
322 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16);
323 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id =
324 	    htons(PFIX_IE_ingressInterface);
325 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4);
326 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id =
327 	    htons(PFIX_IE_egressInterface);
328 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4);
329 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id =
330 	    htons(PFIX_IE_packetDeltaCount);
331 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8);
332 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id =
333 	    htons(PFIX_IE_octetDeltaCount);
334 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8);
335 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id =
336 	    htons(PFIX_IE_flowStartMilliseconds);
337 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8);
338 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id =
339 	    htons(PFIX_IE_flowEndMilliseconds);
340 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8);
341 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id =
342 	    htons(PFIX_IE_sourceTransportPort);
343 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2);
344 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id =
345 	    htons(PFIX_IE_destinationTransportPort);
346 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2);
347 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id =
348 	    htons(PFIX_IE_ipClassOfService);
349 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1);
350 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id =
351 	    htons(PFIX_IE_protocolIdentifier);
352 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1);
353 
354 	/* NAT44 create template */
355 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.tmpl_id =
356 	    htons(PFLOW_IPFIX_TMPL_NAT44_ID);
357 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.field_count =
358 	    htons(PFLOW_IPFIX_TMPL_NAT44_FIELD_COUNT);
359 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.field_id =
360 	    htons(PFIX_IE_timeStamp);
361 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.len =
362 	    htons(8);
363 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.field_id =
364 	    htons(PFIX_IE_natEvent);
365 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.len =
366 	    htons(1);
367 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.field_id =
368 	    htons(PFIX_IE_protocolIdentifier);
369 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.len = htons(1);
370 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.field_id =
371 	    htons(PFIX_IE_sourceIPv4Address);
372 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.len =
373 	    htons(4);
374 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.field_id =
375 	    htons(PFIX_IE_sourceTransportPort);
376 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.len = htons(2);
377 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.field_id =
378 	    htons(PFIX_IE_postNATSourceIPv4Address);
379 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.len =
380 	    htons(4);
381 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.field_id =
382 	    htons(PFIX_IE_postNAPTSourceTransportPort);
383 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.len =
384 	    htons(2);
385 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.field_id =
386 	    htons(PFIX_IE_destinationIPv4Address);
387 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.len =
388 	    htons(4);
389 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.field_id =
390 	    htons(PFIX_IE_destinationTransportPort);
391 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.len = htons(2);
392 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.field_id =
393 	    htons(PFIX_IE_postNATDestinationIPv4Address);
394 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.len =
395 	    htons(4);
396 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.field_id =
397 	    htons(PFIX_IE_postNAPTDestinationTransportPort);
398 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.len =
399 	    htons(2);
400 
401 	pflowif->sc_id = unit;
402 	pflowif->sc_vnet = curvnet;
403 
404 	mbufq_init(&pflowif->sc_outputqueue, 8192);
405 	pflow_setmtu(pflowif, ETHERMTU);
406 
407 	callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0);
408 	callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0);
409 	callout_init_mtx(&pflowif->sc_tmo_nat4, &pflowif->sc_lock, 0);
410 	callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0);
411 
412 	error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process,
413 	    pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie);
414 	if (error) {
415 		free(pflowif, M_DEVBUF);
416 		return (error);
417 	}
418 
419 	/* Insert into list of pflows */
420 	mtx_lock(&V_pflowif_list_mtx);
421 	CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next);
422 	mtx_unlock(&V_pflowif_list_mtx);
423 
424 	V_pflow_export_state_ptr = export_pflow;
425 
426 	return (0);
427 }
428 
429 static void
430 pflow_free_cb(struct epoch_context *ctx)
431 {
432 	struct pflow_softc *sc;
433 
434 	sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx);
435 
436 	free(sc, M_DEVBUF);
437 }
438 
439 static int
440 pflow_destroy(int unit, bool drain)
441 {
442 	struct pflow_softc	*sc;
443 	int			 error __diagused;
444 
445 	mtx_lock(&V_pflowif_list_mtx);
446 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
447 		if (sc->sc_id == unit)
448 			break;
449 	}
450 	if (sc == NULL) {
451 		mtx_unlock(&V_pflowif_list_mtx);
452 		return (ENOENT);
453 	}
454 	CK_LIST_REMOVE(sc, sc_next);
455 	if (CK_LIST_EMPTY(&V_pflowif_list))
456 		V_pflow_export_state_ptr = NULL;
457 	mtx_unlock(&V_pflowif_list_mtx);
458 
459 	sc->sc_dying = 1;
460 
461 	if (drain) {
462 		/* Let's be sure no one is using this interface any more. */
463 		NET_EPOCH_DRAIN_CALLBACKS();
464 	}
465 
466 	error = swi_remove(sc->sc_swi_cookie);
467 	MPASS(error == 0);
468 	error = intr_event_destroy(sc->sc_swi_ie);
469 	MPASS(error == 0);
470 
471 	callout_drain(&sc->sc_tmo);
472 	callout_drain(&sc->sc_tmo6);
473 	callout_drain(&sc->sc_tmo_nat4);
474 	callout_drain(&sc->sc_tmo_tmpl);
475 
476 	m_freem(sc->sc_mbuf);
477 	m_freem(sc->sc_mbuf6);
478 	m_freem(sc->sc_mbuf_nat4);
479 
480 	PFLOW_LOCK(sc);
481 	mbufq_drain(&sc->sc_outputqueue);
482 	if (sc->so != NULL) {
483 		soclose(sc->so);
484 		sc->so = NULL;
485 	}
486 	if (sc->sc_flowdst != NULL)
487 		free(sc->sc_flowdst, M_DEVBUF);
488 	if (sc->sc_flowsrc != NULL)
489 		free(sc->sc_flowsrc, M_DEVBUF);
490 	PFLOW_UNLOCK(sc);
491 
492 	mtx_destroy(&sc->sc_lock);
493 
494 	free_unr(V_pflow_unr, unit);
495 
496 	NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx);
497 
498 	return (0);
499 }
500 
501 static int
502 pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port)
503 {
504 	const struct sockaddr_in6	*sin6;
505 	const struct sockaddr_in	*sin;
506 
507 	if (sa == NULL)
508 		return (0);
509 	switch(sa->sa_family) {
510 	case AF_INET:
511 		sin = (const struct sockaddr_in *)sa;
512 		return (sin->sin_addr.s_addr != INADDR_ANY &&
513 		    (ignore_port || sin->sin_port != 0));
514 	case AF_INET6:
515 		sin6 = (const struct sockaddr_in6 *)sa;
516 		return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
517 		    (ignore_port || sin6->sin6_port != 0));
518 	default:
519 		return (0);
520 	}
521 }
522 
523 int
524 pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
525 {
526 	size_t min;
527 
528 	sc->sc_maxcount4 = (mtu - hdrsz -
529 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
530 	sc->sc_maxcount6 = (mtu - hdrsz -
531 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6);
532 	sc->sc_maxcount_nat4 = (mtu - hdrsz -
533 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_nat4);
534 	if (sc->sc_maxcount4 > PFLOW_MAXFLOWS)
535 		sc->sc_maxcount4 = PFLOW_MAXFLOWS;
536 	if (sc->sc_maxcount6 > PFLOW_MAXFLOWS)
537 		sc->sc_maxcount6 = PFLOW_MAXFLOWS;
538 	if (sc->sc_maxcount_nat4 > PFLOW_MAXFLOWS)
539 		sc->sc_maxcount_nat4 = PFLOW_MAXFLOWS;
540 
541 	min = MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4),
542 	    sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6));
543 	min = MIN(min, sc->sc_maxcount_nat4 * sizeof(struct pflow_ipfix_nat4));
544 
545 	return (hdrsz + sizeof(struct udpiphdr) + min);
546 }
547 
548 static void
549 pflow_setmtu(struct pflow_softc *sc, int mtu_req)
550 {
551 	int	mtu;
552 
553 	mtu = mtu_req;
554 
555 	switch (sc->sc_version) {
556 	case PFLOW_PROTO_5:
557 		sc->sc_maxcount = (mtu - sizeof(struct pflow_header) -
558 		    sizeof(struct udpiphdr)) / sizeof(struct pflow_flow);
559 		if (sc->sc_maxcount > PFLOW_MAXFLOWS)
560 		    sc->sc_maxcount = PFLOW_MAXFLOWS;
561 		break;
562 	case PFLOW_PROTO_10:
563 		pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header));
564 		break;
565 	default: /* NOTREACHED */
566 		break;
567 	}
568 }
569 
570 static struct mbuf *
571 pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id)
572 {
573 	struct pflow_set_header	 set_hdr;
574 	struct pflow_header	 h;
575 	struct mbuf		*m;
576 
577 	MGETHDR(m, M_NOWAIT, MT_DATA);
578 	if (m == NULL) {
579 		pflowstat_inc(pflow_onomem);
580 		return (NULL);
581 	}
582 
583 	MCLGET(m, M_NOWAIT);
584 	if ((m->m_flags & M_EXT) == 0) {
585 		m_free(m);
586 		pflowstat_inc(pflow_onomem);
587 		return (NULL);
588 	}
589 
590 	m->m_len = m->m_pkthdr.len = 0;
591 
592 	if (sc == NULL)		/* get only a new empty mbuf */
593 		return (m);
594 
595 	switch (sc->sc_version) {
596 	case PFLOW_PROTO_5:
597 		/* populate pflow_header */
598 		h.reserved1 = 0;
599 		h.reserved2 = 0;
600 		h.count = 0;
601 		h.version = htons(PFLOW_PROTO_5);
602 		h.flow_sequence = htonl(sc->sc_gcounter);
603 		h.engine_type = PFLOW_ENGINE_TYPE;
604 		h.engine_id = PFLOW_ENGINE_ID;
605 		m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h);
606 
607 		sc->sc_count = 0;
608 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
609 		    pflow_timeout, sc);
610 		break;
611 	case PFLOW_PROTO_10:
612 		/* populate pflow_set_header */
613 		set_hdr.set_length = 0;
614 		set_hdr.set_id = htons(set_id);
615 		m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr);
616 		break;
617 	default: /* NOTREACHED */
618 		break;
619 	}
620 
621 	return (m);
622 }
623 
624 static void
625 copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
626     const struct pf_kstate *st, struct pf_state_key *sk, int src, int dst)
627 {
628 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
629 	flow1->src_port = flow2->dest_port = sk->port[src];
630 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
631 	flow1->dest_port = flow2->src_port = sk->port[dst];
632 
633 	flow1->dest_as = flow2->src_as =
634 	    flow1->src_as = flow2->dest_as = 0;
635 	flow1->if_index_in = htons(st->if_index_in);
636 	flow1->if_index_out = htons(st->if_index_out);
637 	flow2->if_index_in = htons(st->if_index_out);
638 	flow2->if_index_out = htons(st->if_index_in);
639 	flow1->dest_mask = flow2->src_mask =
640 	    flow1->src_mask = flow2->dest_mask = 0;
641 
642 	flow1->flow_packets = htonl(st->packets[0]);
643 	flow2->flow_packets = htonl(st->packets[1]);
644 	flow1->flow_octets = htonl(st->bytes[0]);
645 	flow2->flow_octets = htonl(st->bytes[1]);
646 
647 	/*
648 	 * Pretend the flow was created or expired when the machine came up
649 	 * when creation is in the future of the last time a package was seen
650 	 * or was created / expired before this machine came up due to pfsync.
651 	 */
652 	flow1->flow_start = flow2->flow_start = st->creation < 0 ||
653 	    st->creation > st->expire ? htonl(0) : htonl(st->creation);
654 	flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) :
655 	    htonl(st->expire);
656 	flow1->tcp_flags = flow2->tcp_flags = 0;
657 	flow1->protocol = flow2->protocol = sk->proto;
658 	flow1->tos = flow2->tos = st->rule->tos;
659 }
660 
661 static void
662 copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1,
663     struct pflow_ipfix_flow4 *flow2, const struct pf_kstate *st,
664     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
665 {
666 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
667 	flow1->src_port = flow2->dest_port = sk->port[src];
668 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
669 	flow1->dest_port = flow2->src_port = sk->port[dst];
670 
671 	flow1->if_index_in = htonl(st->if_index_in);
672 	flow1->if_index_out = htonl(st->if_index_out);
673 	flow2->if_index_in = htonl(st->if_index_out);
674 	flow2->if_index_out = htonl(st->if_index_in);
675 
676 	flow1->flow_packets = htobe64(st->packets[0]);
677 	flow2->flow_packets = htobe64(st->packets[1]);
678 	flow1->flow_octets = htobe64(st->bytes[0]);
679 	flow2->flow_octets = htobe64(st->bytes[1]);
680 
681 	/*
682 	 * Pretend the flow was created when the machine came up when creation
683 	 * is in the future of the last time a package was seen due to pfsync.
684 	 */
685 	if (st->creation > st->expire)
686 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
687 		    time_uptime)*1000);
688 	else
689 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
690 		    (pf_get_uptime() - st->creation)));
691 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
692 	    (pf_get_uptime() - st->expire)));
693 
694 	flow1->protocol = flow2->protocol = sk->proto;
695 	flow1->tos = flow2->tos = st->rule->tos;
696 }
697 
698 static void
699 copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1,
700     struct pflow_ipfix_flow6 *flow2, const struct pf_kstate *st,
701     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
702 {
703 	bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip));
704 	bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip));
705 	flow1->src_port = flow2->dest_port = sk->port[src];
706 	bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip));
707 	bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip));
708 	flow1->dest_port = flow2->src_port = sk->port[dst];
709 
710 	flow1->if_index_in = htonl(st->if_index_in);
711 	flow1->if_index_out = htonl(st->if_index_out);
712 	flow2->if_index_in = htonl(st->if_index_out);
713 	flow2->if_index_out = htonl(st->if_index_in);
714 
715 	flow1->flow_packets = htobe64(st->packets[0]);
716 	flow2->flow_packets = htobe64(st->packets[1]);
717 	flow1->flow_octets = htobe64(st->bytes[0]);
718 	flow2->flow_octets = htobe64(st->bytes[1]);
719 
720 	/*
721 	 * Pretend the flow was created when the machine came up when creation
722 	 * is in the future of the last time a package was seen due to pfsync.
723 	 */
724 	if (st->creation > st->expire)
725 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
726 		    time_uptime)*1000);
727 	else
728 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
729 		    (pf_get_uptime() - st->creation)));
730 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
731 	    (pf_get_uptime() - st->expire)));
732 
733 	flow1->protocol = flow2->protocol = sk->proto;
734 	flow1->tos = flow2->tos = st->rule->tos;
735 }
736 
737 static void
738 copy_nat_ipfix_4_data(struct pflow_ipfix_nat4 *nat1,
739     struct pflow_ipfix_nat4 *nat2, const struct pf_kstate *st,
740     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
741 {
742 	nat1->src_ip = nat2->dest_ip = st->key[PF_SK_STACK]->addr[src].v4.s_addr;
743 	nat1->src_port = nat2->dest_port = st->key[PF_SK_STACK]->port[src];
744 	nat1->dest_ip = nat2->src_ip = st->key[PF_SK_STACK]->addr[dst].v4.s_addr;
745 	nat1->dest_port = nat2->src_port = st->key[PF_SK_STACK]->port[dst];
746 	nat1->postnat_src_ip = nat2->postnat_dest_ip = st->key[PF_SK_WIRE]->addr[src].v4.s_addr;
747 	nat1->postnat_src_port = nat2->postnat_dest_port = st->key[PF_SK_WIRE]->port[src];
748 	nat1->postnat_dest_ip = nat2->postnat_src_ip = st->key[PF_SK_WIRE]->addr[dst].v4.s_addr;
749 	nat1->postnat_dest_port = nat2->postnat_src_port = st->key[PF_SK_WIRE]->port[dst];
750 	nat1->protocol = nat2->protocol = sk->proto;
751 
752 	/*
753 	 * Because we have to generate a create and delete event we'll fill out the
754 	 * timestamp and nat_event fields when we transmit. As opposed to doing this
755 	 * work a second time.
756 	*/
757 }
758 
759 static void
760 export_pflow(const struct pf_kstate *st)
761 {
762 	struct pflow_softc	*sc = NULL;
763 	struct pf_state_key	*sk;
764 
765 	NET_EPOCH_ASSERT();
766 
767 	/* e.g. if pf_state_key_attach() fails. */
768 	if (st->key[PF_SK_STACK] == NULL || st->key[PF_SK_WIRE] == NULL)
769 		return;
770 
771 	sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
772 
773 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
774 		PFLOW_LOCK(sc);
775 		switch (sc->sc_version) {
776 		case PFLOW_PROTO_5:
777 			if (sk->af == AF_INET)
778 				export_pflow_if(st, sk, sc);
779 			break;
780 		case PFLOW_PROTO_10:
781 			if (sk->af == AF_INET || sk->af == AF_INET6)
782 				export_pflow_if(st, sk, sc);
783 			break;
784 		default: /* NOTREACHED */
785 			break;
786 		}
787 		PFLOW_UNLOCK(sc);
788 	}
789 }
790 
791 static int
792 export_pflow_if(const struct pf_kstate *st, struct pf_state_key *sk,
793     struct pflow_softc *sc)
794 {
795 	struct pf_kstate	 pfs_copy;
796 	u_int64_t		 bytes[2];
797 	int			 ret = 0;
798 
799 	if (sc->sc_version == PFLOW_PROTO_10)
800 		return (pflow_pack_flow_ipfix(st, sk, sc));
801 
802 	/* PFLOW_PROTO_5 */
803 	if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
804 	    && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
805 		return (pflow_pack_flow(st, sk, sc));
806 
807 	/* flow > PFLOW_MAXBYTES need special handling */
808 	bcopy(st, &pfs_copy, sizeof(pfs_copy));
809 	bytes[0] = pfs_copy.bytes[0];
810 	bytes[1] = pfs_copy.bytes[1];
811 
812 	while (bytes[0] > PFLOW_MAXBYTES) {
813 		pfs_copy.bytes[0] = PFLOW_MAXBYTES;
814 		pfs_copy.bytes[1] = 0;
815 
816 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
817 			return (ret);
818 		if ((bytes[0] - PFLOW_MAXBYTES) > 0)
819 			bytes[0] -= PFLOW_MAXBYTES;
820 	}
821 
822 	while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
823 		pfs_copy.bytes[1] = PFLOW_MAXBYTES;
824 		pfs_copy.bytes[0] = 0;
825 
826 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
827 			return (ret);
828 		if ((bytes[1] - PFLOW_MAXBYTES) > 0)
829 			bytes[1] -= PFLOW_MAXBYTES;
830 	}
831 
832 	pfs_copy.bytes[0] = bytes[0];
833 	pfs_copy.bytes[1] = bytes[1];
834 
835 	return (pflow_pack_flow(&pfs_copy, sk, sc));
836 }
837 
838 static int
839 copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc)
840 {
841 	int		ret = 0;
842 
843 	PFLOW_ASSERT(sc);
844 
845 	if (sc->sc_mbuf == NULL) {
846 		if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
847 			return (ENOBUFS);
848 	}
849 	m_copyback(sc->sc_mbuf, PFLOW_HDRLEN +
850 	    (sc->sc_count * sizeof(struct pflow_flow)),
851 	    sizeof(struct pflow_flow), (caddr_t)flow);
852 
853 	pflowstat_inc(pflow_flows);
854 	sc->sc_gcounter++;
855 	sc->sc_count++;
856 
857 	if (sc->sc_count >= sc->sc_maxcount)
858 		ret = pflow_sendout_v5(sc);
859 
860 	return(ret);
861 }
862 
863 static int
864 copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc)
865 {
866 	int		ret = 0;
867 
868 	PFLOW_ASSERT(sc);
869 
870 	if (sc->sc_mbuf == NULL) {
871 		if ((sc->sc_mbuf =
872 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
873 			return (ENOBUFS);
874 		}
875 		sc->sc_count4 = 0;
876 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
877 		    pflow_timeout, sc);
878 	}
879 	m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN +
880 	    (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)),
881 	    sizeof(struct pflow_ipfix_flow4), (caddr_t)flow);
882 
883 	pflowstat_inc(pflow_flows);
884 	sc->sc_gcounter++;
885 	sc->sc_count4++;
886 
887 	if (sc->sc_count4 >= sc->sc_maxcount4)
888 		ret = pflow_sendout_ipfix(sc, PFLOW_INET);
889 	return(ret);
890 }
891 
892 static int
893 copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc)
894 {
895 	int		ret = 0;
896 
897 	PFLOW_ASSERT(sc);
898 
899 	if (sc->sc_mbuf6 == NULL) {
900 		if ((sc->sc_mbuf6 =
901 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
902 			return (ENOBUFS);
903 		}
904 		sc->sc_count6 = 0;
905 		callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz,
906 		    pflow_timeout6, sc);
907 	}
908 	m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN +
909 	    (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)),
910 	    sizeof(struct pflow_ipfix_flow6), (caddr_t)flow);
911 
912 	pflowstat_inc(pflow_flows);
913 	sc->sc_gcounter++;
914 	sc->sc_count6++;
915 
916 	if (sc->sc_count6 >= sc->sc_maxcount6)
917 		ret = pflow_sendout_ipfix(sc, PFLOW_INET6);
918 
919 	return(ret);
920 }
921 
922 int
923 copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *nat, const struct pf_kstate *st,
924     struct pflow_softc *sc, uint8_t event, uint64_t timestamp)
925 {
926 	int		ret = 0;
927 
928 	PFLOW_ASSERT(sc);
929 
930 	if (sc->sc_mbuf_nat4 == NULL) {
931 		if ((sc->sc_mbuf_nat4 =
932 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_NAT44_ID)) == NULL) {
933 			return (ENOBUFS);
934 		}
935 		sc->sc_count_nat4 = 0;
936 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
937 		    pflow_timeout_nat4, sc);
938 	}
939 
940 	nat->nat_event = event;
941 	nat->timestamp = htobe64(pf_get_time() - (pf_get_uptime() - timestamp));
942 	m_copyback(sc->sc_mbuf_nat4, PFLOW_SET_HDRLEN +
943 	    (sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4)),
944 	    sizeof(struct pflow_ipfix_nat4), (caddr_t)nat);
945 	sc->sc_count_nat4++;
946 
947 	pflowstat_inc(pflow_flows);
948 	sc->sc_gcounter++;
949 
950 	if (sc->sc_count_nat4 >= sc->sc_maxcount_nat4)
951 		ret = pflow_sendout_ipfix(sc, PFLOW_NAT4);
952 
953 	return (ret);
954 }
955 
956 static int
957 pflow_pack_flow(const struct pf_kstate *st, struct pf_state_key *sk,
958     struct pflow_softc *sc)
959 {
960 	struct pflow_flow	 flow1;
961 	struct pflow_flow	 flow2;
962 	int			 ret = 0;
963 
964 	bzero(&flow1, sizeof(flow1));
965 	bzero(&flow2, sizeof(flow2));
966 
967 	if (st->direction == PF_OUT)
968 		copy_flow_data(&flow1, &flow2, st, sk, 1, 0);
969 	else
970 		copy_flow_data(&flow1, &flow2, st, sk, 0, 1);
971 
972 	if (st->bytes[0] != 0) /* first flow from state */
973 		ret = copy_flow_to_m(&flow1, sc);
974 
975 	if (st->bytes[1] != 0) /* second flow from state */
976 		ret = copy_flow_to_m(&flow2, sc);
977 
978 	return (ret);
979 }
980 
981 static bool
982 pflow_is_natd(const struct pf_kstate *st)
983 {
984 	/* If ports or addresses are different we've been NAT-ed. */
985 	return (memcmp(st->key[PF_SK_WIRE], st->key[PF_SK_STACK],
986 	    sizeof(struct pf_addr) * 2 + sizeof(uint16_t) * 2) != 0);
987 }
988 
989 static int
990 pflow_pack_flow_ipfix(const struct pf_kstate *st, struct pf_state_key *sk,
991     struct pflow_softc *sc)
992 {
993 	struct pflow_ipfix_flow4	 flow4_1, flow4_2;
994 	struct pflow_ipfix_nat4		 nat4_1, nat4_2;
995 	struct pflow_ipfix_flow6	 flow6_1, flow6_2;
996 	int				 ret = 0;
997 	bool				 nat = false;
998 
999 	switch (sk->af) {
1000 	case AF_INET:
1001 		bzero(&flow4_1, sizeof(flow4_1));
1002 		bzero(&flow4_2, sizeof(flow4_2));
1003 
1004 		nat = pflow_is_natd(st);
1005 
1006 		if (st->direction == PF_OUT)
1007 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
1008 			    1, 0);
1009 		else
1010 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
1011 			    0, 1);
1012 
1013 		if (nat)
1014 			copy_nat_ipfix_4_data(&nat4_1, &nat4_2, st, sk, sc, 1, 0);
1015 
1016 		if (st->bytes[0] != 0) /* first flow from state */ {
1017 			ret = copy_flow_ipfix_4_to_m(&flow4_1, sc);
1018 
1019 			if (ret == 0 && nat) {
1020 				ret = copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1021 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1022 				ret |= copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1023 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1024 			}
1025 		}
1026 
1027 		if (st->bytes[1] != 0) /* second flow from state */ {
1028 			ret = copy_flow_ipfix_4_to_m(&flow4_2, sc);
1029 
1030 			if (ret == 0 && nat) {
1031 				ret = copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1032 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1033 				ret |= copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1034 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1035 			}
1036 		}
1037 		break;
1038 	case AF_INET6:
1039 		bzero(&flow6_1, sizeof(flow6_1));
1040 		bzero(&flow6_2, sizeof(flow6_2));
1041 
1042 		if (st->direction == PF_OUT)
1043 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1044 			    1, 0);
1045 		else
1046 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1047 			    0, 1);
1048 
1049 		if (st->bytes[0] != 0) /* first flow from state */
1050 			ret = copy_flow_ipfix_6_to_m(&flow6_1, sc);
1051 
1052 		if (st->bytes[1] != 0) /* second flow from state */
1053 			ret = copy_flow_ipfix_6_to_m(&flow6_2, sc);
1054 		break;
1055 	}
1056 	return (ret);
1057 }
1058 
1059 static void
1060 pflow_timeout(void *v)
1061 {
1062 	struct pflow_softc	*sc = v;
1063 
1064 	PFLOW_ASSERT(sc);
1065 	CURVNET_SET(sc->sc_vnet);
1066 
1067 	switch (sc->sc_version) {
1068 	case PFLOW_PROTO_5:
1069 		pflow_sendout_v5(sc);
1070 		break;
1071 	case PFLOW_PROTO_10:
1072 		pflow_sendout_ipfix(sc, PFLOW_INET);
1073 		break;
1074 	default: /* NOTREACHED */
1075 		panic("Unsupported version %d", sc->sc_version);
1076 		break;
1077 	}
1078 
1079 	CURVNET_RESTORE();
1080 }
1081 
1082 static void
1083 pflow_timeout6(void *v)
1084 {
1085 	struct pflow_softc	*sc = v;
1086 
1087 	PFLOW_ASSERT(sc);
1088 
1089 	if (sc->sc_version != PFLOW_PROTO_10)
1090 		return;
1091 
1092 	CURVNET_SET(sc->sc_vnet);
1093 	pflow_sendout_ipfix(sc, PFLOW_INET6);
1094 	CURVNET_RESTORE();
1095 }
1096 
1097 static void
1098 pflow_timeout_tmpl(void *v)
1099 {
1100 	struct pflow_softc	*sc = v;
1101 
1102 	PFLOW_ASSERT(sc);
1103 
1104 	if (sc->sc_version != PFLOW_PROTO_10)
1105 		return;
1106 
1107 	CURVNET_SET(sc->sc_vnet);
1108 	pflow_sendout_ipfix_tmpl(sc);
1109 	CURVNET_RESTORE();
1110 }
1111 
1112 static void
1113 pflow_timeout_nat4(void *v)
1114 {
1115 	struct pflow_softc	*sc = v;
1116 
1117 	PFLOW_ASSERT(sc);
1118 
1119 	if (sc->sc_version != PFLOW_PROTO_10)
1120 		return;
1121 
1122 	CURVNET_SET(sc->sc_vnet);
1123 	pflow_sendout_ipfix(sc, PFLOW_NAT4);
1124 	CURVNET_RESTORE();
1125 }
1126 
1127 static void
1128 pflow_flush(struct pflow_softc *sc)
1129 {
1130 	PFLOW_ASSERT(sc);
1131 
1132 	switch (sc->sc_version) {
1133 	case PFLOW_PROTO_5:
1134 		pflow_sendout_v5(sc);
1135 		break;
1136 	case PFLOW_PROTO_10:
1137 		pflow_sendout_ipfix(sc, PFLOW_INET);
1138 		pflow_sendout_ipfix(sc, PFLOW_INET6);
1139 		pflow_sendout_ipfix(sc, PFLOW_NAT4);
1140 		break;
1141 	default: /* NOTREACHED */
1142 		break;
1143 	}
1144 }
1145 
1146 static int
1147 pflow_sendout_v5(struct pflow_softc *sc)
1148 {
1149 	struct mbuf		*m = sc->sc_mbuf;
1150 	struct pflow_header	*h;
1151 	struct timespec		tv;
1152 
1153 	PFLOW_ASSERT(sc);
1154 
1155 	if (m == NULL)
1156 		return (0);
1157 
1158 	sc->sc_mbuf = NULL;
1159 
1160 	pflowstat_inc(pflow_packets);
1161 	h = mtod(m, struct pflow_header *);
1162 	h->count = htons(sc->sc_count);
1163 
1164 	/* populate pflow_header */
1165 	h->uptime_ms = htonl(time_uptime * 1000);
1166 
1167 	getnanotime(&tv);
1168 	h->time_sec = htonl(tv.tv_sec);			/* XXX 2038 */
1169 	h->time_nanosec = htonl(tv.tv_nsec);
1170 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1171 		swi_sched(sc->sc_swi_cookie, 0);
1172 
1173 	return (0);
1174 }
1175 
1176 static int
1177 pflow_sendout_ipfix(struct pflow_softc *sc, enum pflow_family_t af)
1178 {
1179 	struct mbuf			*m;
1180 	struct pflow_v10_header		*h10;
1181 	struct pflow_set_header		*set_hdr;
1182 	u_int32_t			 count;
1183 	int				 set_length;
1184 
1185 	PFLOW_ASSERT(sc);
1186 
1187 	switch (af) {
1188 	case PFLOW_INET:
1189 		m = sc->sc_mbuf;
1190 		callout_stop(&sc->sc_tmo);
1191 		if (m == NULL)
1192 			return (0);
1193 		sc->sc_mbuf = NULL;
1194 		count = sc->sc_count4;
1195 		set_length = sizeof(struct pflow_set_header)
1196 		    + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4);
1197 		break;
1198 	case PFLOW_INET6:
1199 		m = sc->sc_mbuf6;
1200 		callout_stop(&sc->sc_tmo6);
1201 		if (m == NULL)
1202 			return (0);
1203 		sc->sc_mbuf6 = NULL;
1204 		count = sc->sc_count6;
1205 		set_length = sizeof(struct pflow_set_header)
1206 		    + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6);
1207 		break;
1208 	case PFLOW_NAT4:
1209 		m = sc->sc_mbuf_nat4;
1210 		callout_stop(&sc->sc_tmo_nat4);
1211 		if (m == NULL)
1212 			return (0);
1213 		sc->sc_mbuf_nat4 = NULL;
1214 		count = sc->sc_count_nat4;
1215 		set_length = sizeof(struct pflow_set_header)
1216 		    + sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4);
1217 		break;
1218 	default:
1219 		panic("Unsupported AF %d", af);
1220 	}
1221 
1222 	pflowstat_inc(pflow_packets);
1223 
1224 	set_hdr = mtod(m, struct pflow_set_header *);
1225 	set_hdr->set_length = htons(set_length);
1226 
1227 	/* populate pflow_header */
1228 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1229 	if (m == NULL) {
1230 		pflowstat_inc(pflow_onomem);
1231 		return (ENOBUFS);
1232 	}
1233 	h10 = mtod(m, struct pflow_v10_header *);
1234 	h10->version = htons(PFLOW_PROTO_10);
1235 	h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length);
1236 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1237 	h10->flow_sequence = htonl(sc->sc_sequence);
1238 	sc->sc_sequence += count;
1239 	h10->observation_dom = htonl(sc->sc_observation_dom);
1240 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1241 		swi_sched(sc->sc_swi_cookie, 0);
1242 
1243 	return (0);
1244 }
1245 
1246 static int
1247 pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
1248 {
1249 	struct mbuf			*m;
1250 	struct pflow_v10_header		*h10;
1251 
1252 	PFLOW_ASSERT(sc);
1253 
1254 	m = pflow_get_mbuf(sc, 0);
1255 	if (m == NULL)
1256 		return (0);
1257 	m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
1258 	    (caddr_t)&sc->sc_tmpl_ipfix);
1259 
1260 	pflowstat_inc(pflow_packets);
1261 
1262 	/* populate pflow_header */
1263 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1264 	if (m == NULL) {
1265 		pflowstat_inc(pflow_onomem);
1266 		return (ENOBUFS);
1267 	}
1268 	h10 = mtod(m, struct pflow_v10_header *);
1269 	h10->version = htons(PFLOW_PROTO_10);
1270 	h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct
1271 	    pflow_ipfix_tmpl));
1272 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1273 	h10->flow_sequence = htonl(sc->sc_sequence);
1274 	h10->observation_dom = htonl(sc->sc_observation_dom);
1275 
1276 	callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1277 	    pflow_timeout_tmpl, sc);
1278 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1279 		swi_sched(sc->sc_swi_cookie, 0);
1280 
1281 	return (0);
1282 }
1283 
1284 static int
1285 pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
1286 {
1287 	if (sc->so == NULL) {
1288 		m_freem(m);
1289 		return (EINVAL);
1290 	}
1291 	return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread));
1292 }
1293 
1294 static int
1295 sysctl_pflowstats(SYSCTL_HANDLER_ARGS)
1296 {
1297 	struct pflowstats pflowstats;
1298 
1299 	pflowstats.pflow_flows =
1300 	    counter_u64_fetch(V_pflowstats.c[pflow_flows]);
1301 	pflowstats.pflow_packets =
1302 	    counter_u64_fetch(V_pflowstats.c[pflow_packets]);
1303 	pflowstats.pflow_onomem =
1304 	    counter_u64_fetch(V_pflowstats.c[pflow_onomem]);
1305 	pflowstats.pflow_oerrors =
1306 	    counter_u64_fetch(V_pflowstats.c[pflow_oerrors]);
1307 
1308 	return (sysctl_handle_opaque(oidp, &pflowstats, sizeof(pflowstats), req));
1309 }
1310 
1311 static int
1312 pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt)
1313 {
1314 	struct epoch_tracker	 et;
1315 	struct pflow_softc	*sc = NULL;
1316 	struct nl_writer	 *nw = npt->nw;
1317 	int			 error = 0;
1318 
1319 	hdr->nlmsg_flags |= NLM_F_MULTI;
1320 
1321 	NET_EPOCH_ENTER(et);
1322 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1323 		if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1324 			error = ENOMEM;
1325 			goto out;
1326 		}
1327 
1328 		struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1329 		ghdr_new->cmd = PFLOWNL_CMD_LIST;
1330 		ghdr_new->version = 0;
1331 		ghdr_new->reserved = 0;
1332 
1333 		nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id);
1334 
1335 		if (! nlmsg_end(nw)) {
1336 			error = ENOMEM;
1337 			goto out;
1338 		}
1339 	}
1340 
1341 out:
1342 	NET_EPOCH_EXIT(et);
1343 
1344 	if (error != 0)
1345 		nlmsg_abort(nw);
1346 
1347 	return (error);
1348 }
1349 
1350 static int
1351 pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt)
1352 {
1353 	struct nl_writer	 *nw = npt->nw;
1354 	int			 error = 0;
1355 	int			 unit;
1356 
1357 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1358 		return (ENOMEM);
1359 	}
1360 
1361 	struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1362 	ghdr_new->cmd = PFLOWNL_CMD_CREATE;
1363 	ghdr_new->version = 0;
1364 	ghdr_new->reserved = 0;
1365 
1366 	unit = alloc_unr(V_pflow_unr);
1367 	if (unit == -1) {
1368 		nlmsg_abort(nw);
1369 		return (ENOMEM);
1370 	}
1371 
1372 	error = pflow_create(unit);
1373 	if (error != 0) {
1374 		free_unr(V_pflow_unr, unit);
1375 		nlmsg_abort(nw);
1376 		return (error);
1377 	}
1378 
1379 	nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit);
1380 
1381 	if (! nlmsg_end(nw)) {
1382 		pflow_destroy(unit, true);
1383 		return (ENOMEM);
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 struct pflow_parsed_del {
1390 	int id;
1391 };
1392 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1393 #define	_OUT(_field)	offsetof(struct pflow_parsed_del, _field)
1394 static const struct nlattr_parser nla_p_del[] = {
1395 	{ .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1396 };
1397 static const struct nlfield_parser nlf_p_del[] = {};
1398 #undef _IN
1399 #undef _OUT
1400 NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del);
1401 
1402 static int
1403 pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt)
1404 {
1405 	struct pflow_parsed_del d = {};
1406 	int error;
1407 
1408 	error = nl_parse_nlmsg(hdr, &del_parser, npt, &d);
1409 	if (error != 0)
1410 		return (error);
1411 
1412 	error = pflow_destroy(d.id, true);
1413 
1414 	return (error);
1415 }
1416 
1417 struct pflow_parsed_get {
1418 	int id;
1419 };
1420 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1421 #define	_OUT(_field)	offsetof(struct pflow_parsed_get, _field)
1422 static const struct nlattr_parser nla_p_get[] = {
1423 	{ .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1424 };
1425 static const struct nlfield_parser nlf_p_get[] = {};
1426 #undef _IN
1427 #undef _OUT
1428 NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get);
1429 
1430 static bool
1431 nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s)
1432 {
1433 	int off = nlattr_add_nested(nw, attr);
1434 	if (off == 0)
1435 		return (false);
1436 
1437 	nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family);
1438 
1439 	switch (s->sa_family) {
1440 	case AF_INET: {
1441 		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
1442 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port);
1443 		nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr);
1444 		break;
1445 	}
1446 	case AF_INET6: {
1447 		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
1448 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port);
1449 		nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr);
1450 		break;
1451 	}
1452 	default:
1453 		panic("Unknown address family %d", s->sa_family);
1454 	}
1455 
1456 	nlattr_set_len(nw, off);
1457 	return (true);
1458 }
1459 
1460 static int
1461 pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
1462 {
1463 	struct epoch_tracker et;
1464 	struct pflow_parsed_get g = {};
1465 	struct pflow_softc *sc = NULL;
1466 	struct nl_writer *nw = npt->nw;
1467 	struct genlmsghdr *ghdr_new;
1468 	int error;
1469 
1470 	error = nl_parse_nlmsg(hdr, &get_parser, npt, &g);
1471 	if (error != 0)
1472 		return (error);
1473 
1474 	NET_EPOCH_ENTER(et);
1475 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1476 		if (sc->sc_id == g.id)
1477 			break;
1478 	}
1479 	if (sc == NULL) {
1480 		error = ENOENT;
1481 		goto out;
1482 	}
1483 
1484 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1485 		nlmsg_abort(nw);
1486 		error = ENOMEM;
1487 		goto out;
1488 	}
1489 
1490 	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1491 	if (ghdr_new == NULL) {
1492 		nlmsg_abort(nw);
1493 		error = ENOMEM;
1494 		goto out;
1495 	}
1496 
1497 	ghdr_new->cmd = PFLOWNL_CMD_GET;
1498 	ghdr_new->version = 0;
1499 	ghdr_new->reserved = 0;
1500 
1501 	nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id);
1502 	nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version);
1503 	if (sc->sc_flowsrc)
1504 		nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc);
1505 	if (sc->sc_flowdst)
1506 		nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst);
1507 	nlattr_add_u32(nw, PFLOWNL_GET_OBSERVATION_DOMAIN,
1508 	    sc->sc_observation_dom);
1509 	nlattr_add_u8(nw, PFLOWNL_GET_SOCKET_STATUS, sc->so != NULL);
1510 
1511 	if (! nlmsg_end(nw)) {
1512 		nlmsg_abort(nw);
1513 		error = ENOMEM;
1514 	}
1515 
1516 out:
1517 	NET_EPOCH_EXIT(et);
1518 
1519 	return (error);
1520 }
1521 
1522 struct pflow_sockaddr {
1523 	union {
1524 		struct sockaddr_in in;
1525 		struct sockaddr_in6 in6;
1526 		struct sockaddr_storage storage;
1527 	};
1528 };
1529 static bool
1530 pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused)
1531 {
1532 	struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args;
1533 
1534 	if (s->storage.ss_family == AF_INET)
1535 		s->storage.ss_len = sizeof(struct sockaddr_in);
1536 	else if (s->storage.ss_family == AF_INET6)
1537 		s->storage.ss_len = sizeof(struct sockaddr_in6);
1538 	else
1539 		return (false);
1540 
1541 	return (true);
1542 }
1543 
1544 #define	_OUT(_field)	offsetof(struct pflow_sockaddr, _field)
1545 static struct nlattr_parser nla_p_sockaddr[] = {
1546 	{ .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 },
1547 	{ .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 },
1548 	{ .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr },
1549 	{ .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr },
1550 };
1551 NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr);
1552 #undef _OUT
1553 
1554 struct pflow_parsed_set {
1555 	int id;
1556 	uint16_t version;
1557 	struct sockaddr_storage src;
1558 	struct sockaddr_storage dst;
1559 	uint32_t observation_dom;
1560 };
1561 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1562 #define	_OUT(_field)	offsetof(struct pflow_parsed_set, _field)
1563 static const struct nlattr_parser nla_p_set[] = {
1564 	{ .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1565 	{ .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 },
1566 	{ .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested },
1567 	{ .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested },
1568 	{ .type = PFLOWNL_SET_OBSERVATION_DOMAIN, .off = _OUT(observation_dom), .cb = nlattr_get_uint32 },
1569 };
1570 static const struct nlfield_parser nlf_p_set[] = {};
1571 #undef _IN
1572 #undef _OUT
1573 NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
1574 
1575 static int
1576 pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred)
1577 {
1578 	struct thread		*td;
1579 	struct socket		*so;
1580 	int			 error = 0;
1581 
1582 	td = curthread;
1583 
1584 	PFLOW_ASSERT(sc);
1585 
1586 	if (pflowr->version != 0) {
1587 		switch(pflowr->version) {
1588 		case PFLOW_PROTO_5:
1589 		case PFLOW_PROTO_10:
1590 			break;
1591 		default:
1592 			return(EINVAL);
1593 		}
1594 	}
1595 
1596 	pflow_flush(sc);
1597 
1598 	if (pflowr->dst.ss_len != 0) {
1599 		if (sc->sc_flowdst != NULL &&
1600 		    sc->sc_flowdst->sa_family != pflowr->dst.ss_family) {
1601 			free(sc->sc_flowdst, M_DEVBUF);
1602 			sc->sc_flowdst = NULL;
1603 			if (sc->so != NULL) {
1604 				soclose(sc->so);
1605 				sc->so = NULL;
1606 			}
1607 		}
1608 
1609 		switch (pflowr->dst.ss_family) {
1610 		case AF_INET:
1611 			if (sc->sc_flowdst == NULL) {
1612 				if ((sc->sc_flowdst = malloc(
1613 				    sizeof(struct sockaddr_in),
1614 				    M_DEVBUF,  M_NOWAIT)) == NULL)
1615 					return (ENOMEM);
1616 			}
1617 			memcpy(sc->sc_flowdst, &pflowr->dst,
1618 			    sizeof(struct sockaddr_in));
1619 			sc->sc_flowdst->sa_len = sizeof(struct
1620 			    sockaddr_in);
1621 			break;
1622 		case AF_INET6:
1623 			if (sc->sc_flowdst == NULL) {
1624 				if ((sc->sc_flowdst = malloc(
1625 				    sizeof(struct sockaddr_in6),
1626 				    M_DEVBUF, M_NOWAIT)) == NULL)
1627 					return (ENOMEM);
1628 			}
1629 			memcpy(sc->sc_flowdst, &pflowr->dst,
1630 			    sizeof(struct sockaddr_in6));
1631 			sc->sc_flowdst->sa_len = sizeof(struct
1632 			    sockaddr_in6);
1633 			break;
1634 		default:
1635 			break;
1636 		}
1637 	}
1638 
1639 	if (pflowr->src.ss_len != 0) {
1640 		if (sc->sc_flowsrc != NULL)
1641 			free(sc->sc_flowsrc, M_DEVBUF);
1642 		sc->sc_flowsrc = NULL;
1643 		if (sc->so != NULL) {
1644 			soclose(sc->so);
1645 			sc->so = NULL;
1646 		}
1647 		switch(pflowr->src.ss_family) {
1648 		case AF_INET:
1649 			if ((sc->sc_flowsrc = malloc(
1650 			    sizeof(struct sockaddr_in),
1651 			    M_DEVBUF, M_NOWAIT)) == NULL)
1652 				return (ENOMEM);
1653 			memcpy(sc->sc_flowsrc, &pflowr->src,
1654 			    sizeof(struct sockaddr_in));
1655 			sc->sc_flowsrc->sa_len = sizeof(struct
1656 			    sockaddr_in);
1657 			break;
1658 		case AF_INET6:
1659 			if ((sc->sc_flowsrc = malloc(
1660 			    sizeof(struct sockaddr_in6),
1661 			    M_DEVBUF, M_NOWAIT)) == NULL)
1662 				return (ENOMEM);
1663 			memcpy(sc->sc_flowsrc, &pflowr->src,
1664 			    sizeof(struct sockaddr_in6));
1665 			sc->sc_flowsrc->sa_len = sizeof(struct
1666 			    sockaddr_in6);
1667 			break;
1668 		default:
1669 			break;
1670 		}
1671 	}
1672 
1673 	if (sc->so == NULL) {
1674 		if (pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1675 			error = socreate(sc->sc_flowdst->sa_family,
1676 			    &so, SOCK_DGRAM, IPPROTO_UDP, cred, td);
1677 			if (error)
1678 				return (error);
1679 			if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) {
1680 				error = sobind(so, sc->sc_flowsrc, td);
1681 				if (error) {
1682 					soclose(so);
1683 					return (error);
1684 				}
1685 			}
1686 			sc->so = so;
1687 		}
1688 	} else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1689 		soclose(sc->so);
1690 		sc->so = NULL;
1691 	}
1692 
1693 	if (pflowr->observation_dom != 0)
1694 		sc->sc_observation_dom = pflowr->observation_dom;
1695 
1696 	/* error check is above */
1697 	if (pflowr->version != 0)
1698 		sc->sc_version = pflowr->version;
1699 
1700 	pflow_setmtu(sc, ETHERMTU);
1701 
1702 	switch (sc->sc_version) {
1703 	case PFLOW_PROTO_5:
1704 		callout_stop(&sc->sc_tmo6);
1705 		callout_stop(&sc->sc_tmo_tmpl);
1706 		break;
1707 	case PFLOW_PROTO_10:
1708 		callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1709 		    pflow_timeout_tmpl, sc);
1710 		break;
1711 	default: /* NOTREACHED */
1712 		break;
1713 	}
1714 
1715 	return (0);
1716 }
1717 
1718 static int
1719 pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
1720 {
1721 	struct epoch_tracker et;
1722 	struct pflow_parsed_set s = {};
1723 	struct pflow_softc *sc = NULL;
1724 	int error;
1725 
1726 	error = nl_parse_nlmsg(hdr, &set_parser, npt, &s);
1727 	if (error != 0)
1728 		return (error);
1729 
1730 	NET_EPOCH_ENTER(et);
1731 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1732 		if (sc->sc_id == s.id)
1733 			break;
1734 	}
1735 	if (sc == NULL) {
1736 		error = ENOENT;
1737 		goto out;
1738 	}
1739 
1740 	PFLOW_LOCK(sc);
1741 	error = pflow_set(sc, &s, nlp_get_cred(npt->nlp));
1742 	PFLOW_UNLOCK(sc);
1743 
1744 out:
1745 	NET_EPOCH_EXIT(et);
1746 	return (error);
1747 }
1748 
1749 static const struct genl_cmd pflow_cmds[] = {
1750 	{
1751 		.cmd_num = PFLOWNL_CMD_LIST,
1752 		.cmd_name = "LIST",
1753 		.cmd_cb = pflow_nl_list,
1754 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1755 		.cmd_priv = PRIV_NETINET_PF,
1756 	},
1757 	{
1758 		.cmd_num = PFLOWNL_CMD_CREATE,
1759 		.cmd_name = "CREATE",
1760 		.cmd_cb = pflow_nl_create,
1761 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1762 		.cmd_priv = PRIV_NETINET_PF,
1763 	},
1764 	{
1765 		.cmd_num = PFLOWNL_CMD_DEL,
1766 		.cmd_name = "DEL",
1767 		.cmd_cb = pflow_nl_del,
1768 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1769 		.cmd_priv = PRIV_NETINET_PF,
1770 	},
1771 	{
1772 		.cmd_num = PFLOWNL_CMD_GET,
1773 		.cmd_name = "GET",
1774 		.cmd_cb = pflow_nl_get,
1775 		.cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1776 		.cmd_priv = PRIV_NETINET_PF,
1777 	},
1778 	{
1779 		.cmd_num = PFLOWNL_CMD_SET,
1780 		.cmd_name = "SET",
1781 		.cmd_cb = pflow_nl_set,
1782 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1783 		.cmd_priv = PRIV_NETINET_PF,
1784 	},
1785 };
1786 
1787 static const struct nlhdr_parser *all_parsers[] = {
1788 	&del_parser,
1789 	&get_parser,
1790 	&set_parser,
1791 };
1792 
1793 static unsigned		pflow_do_osd_jail_slot;
1794 
1795 static int
1796 pflow_init(void)
1797 {
1798 	bool ret;
1799 	int family_id __diagused;
1800 
1801 	NL_VERIFY_PARSERS(all_parsers);
1802 
1803 	static osd_method_t methods[PR_MAXMETHOD] = {
1804 		[PR_METHOD_REMOVE] = pflow_jail_remove,
1805 	};
1806 	pflow_do_osd_jail_slot = osd_jail_register(NULL, methods);
1807 
1808 	family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX);
1809 	MPASS(family_id != 0);
1810 	ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds,
1811 	    nitems(pflow_cmds));
1812 
1813 	return (ret ? 0 : ENODEV);
1814 }
1815 
1816 static void
1817 pflow_uninit(void)
1818 {
1819 	osd_jail_deregister(pflow_do_osd_jail_slot);
1820 	genl_unregister_family(PFLOWNL_FAMILY_NAME);
1821 }
1822 
1823 static int
1824 pflow_modevent(module_t mod, int type, void *data)
1825 {
1826 	int error = 0;
1827 
1828 	switch (type) {
1829 	case MOD_LOAD:
1830 		error = pflow_init();
1831 		break;
1832 	case MOD_UNLOAD:
1833 		pflow_uninit();
1834 		break;
1835 	default:
1836 		error = EINVAL;
1837 		break;
1838 	}
1839 
1840 	return (error);
1841 }
1842 
1843 static moduledata_t pflow_mod = {
1844 	pflowname,
1845 	pflow_modevent,
1846 	0
1847 };
1848 
1849 DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
1850 MODULE_VERSION(pflow, 1);
1851 MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1852