xref: /freebsd/sys/netpfil/pf/pflow.c (revision 7a7741af18d6c8a804cc643cb7ecda9d730c6aa6)
1 /*	$OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $	*/
2 
3 /*
4  * Copyright (c) 2023 Rubicon Communications, LLC (Netgate)
5  * Copyright (c) 2011 Florian Obser <florian@narrans.de>
6  * Copyright (c) 2011 Sebastian Benoit <benoit-lists@fb12.de>
7  * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
8  * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
9  *
10  * Permission to use, copy, modify, and distribute this software for any
11  * purpose with or without fee is hereby granted, provided that the above
12  * copyright notice and this permission notice appear in all copies.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
20  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #include <sys/cdefs.h>
24 #include <sys/param.h>
25 #include <sys/bus.h>
26 #include <sys/callout.h>
27 #include <sys/endian.h>
28 #include <sys/interrupt.h>
29 #include <sys/kernel.h>
30 #include <sys/malloc.h>
31 #include <sys/module.h>
32 #include <sys/mbuf.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/sockio.h>
36 #include <sys/sysctl.h>
37 #include <sys/systm.h>
38 #include <sys/priv.h>
39 
40 #include <net/if.h>
41 #include <net/if_types.h>
42 #include <net/bpf.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <netinet/if_ether.h>
46 #include <netinet/tcp.h>
47 
48 #include <netinet/ip.h>
49 #include <netinet/ip_icmp.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/udp.h>
52 #include <netinet/udp_var.h>
53 #include <netinet/in_pcb.h>
54 
55 #include <netlink/netlink.h>
56 #include <netlink/netlink_ctl.h>
57 #include <netlink/netlink_generic.h>
58 #include <netlink/netlink_message_writer.h>
59 
60 #include <net/pfvar.h>
61 #include <net/pflow.h>
62 #include "net/if_var.h"
63 
64 #define PFLOW_MINMTU	\
65     (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
66 
67 #ifdef PFLOWDEBUG
68 #define DPRINTF(x)	do { printf x ; } while (0)
69 #else
70 #define DPRINTF(x)
71 #endif
72 
73 enum pflow_family_t {
74 	PFLOW_INET,
75 	PFLOW_INET6,
76 	PFLOW_NAT4,
77 };
78 
79 static void	pflow_output_process(void *);
80 static int	pflow_create(int);
81 static int	pflow_destroy(int, bool);
82 static int	pflow_calc_mtu(struct pflow_softc *, int, int);
83 static void	pflow_setmtu(struct pflow_softc *, int);
84 static int	pflowvalidsockaddr(const struct sockaddr *, int);
85 
86 static struct mbuf	*pflow_get_mbuf(struct pflow_softc *, u_int16_t);
87 static void	pflow_flush(struct pflow_softc *);
88 static int	pflow_sendout_v5(struct pflow_softc *);
89 static int	pflow_sendout_ipfix(struct pflow_softc *, enum pflow_family_t);
90 static int	pflow_sendout_ipfix_tmpl(struct pflow_softc *);
91 static int	pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
92 static int	sysctl_pflowstats(SYSCTL_HANDLER_ARGS);
93 static void	pflow_timeout(void *);
94 static void	pflow_timeout6(void *);
95 static void	pflow_timeout_tmpl(void *);
96 static void	pflow_timeout_nat4(void *);
97 static void	copy_flow_data(struct pflow_flow *, struct pflow_flow *,
98 	const struct pf_kstate *, struct pf_state_key *, int, int);
99 static void	copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *,
100 	struct pflow_ipfix_flow4 *, const struct pf_kstate *, struct pf_state_key *,
101 	struct pflow_softc *, int, int);
102 static void	copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *,
103 	struct pflow_ipfix_flow6 *, const struct pf_kstate *, struct pf_state_key *,
104 	struct pflow_softc *, int, int);
105 static int	pflow_pack_flow(const struct pf_kstate *, struct pf_state_key *,
106 	struct pflow_softc *);
107 static int	pflow_pack_flow_ipfix(const struct pf_kstate *, struct pf_state_key *,
108 	struct pflow_softc *);
109 static void	export_pflow(const struct pf_kstate *);
110 static int	export_pflow_if(const struct pf_kstate*, struct pf_state_key *,
111 	struct pflow_softc *);
112 static int	copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
113 static int	copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow,
114 	struct pflow_softc *sc);
115 static int	copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow,
116 	struct pflow_softc *sc);
117 static int	copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *,
118 	const struct pf_kstate *, struct pflow_softc *,
119 	uint8_t, uint64_t);
120 
121 static const char pflowname[] = "pflow";
122 
123 enum pflowstat_counters {
124 	pflow_flows,
125 	pflow_packets,
126 	pflow_onomem,
127 	pflow_oerrors,
128 	pflow_ncounters,
129 };
130 struct pflowstats_ctr {
131 	counter_u64_t	c[pflow_ncounters];
132 };
133 
134 /**
135  * Locking concept
136  *
137  * The list of pflow devices (V_pflowif_list) is managed through epoch.
138  * It is safe to read the list without locking (while in NET_EPOCH).
139  * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx
140  * on every add/delete.
141  *
142  * Each pflow interface protects its own data with the sc_lock mutex.
143  *
144  * We do not require any pf locks, and in fact expect to be called without
145  * hashrow locks held.
146  **/
147 
148 VNET_DEFINE(struct unrhdr *,	pflow_unr);
149 #define	V_pflow_unr	VNET(pflow_unr)
150 VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list);
151 #define	V_pflowif_list	VNET(pflowif_list)
152 VNET_DEFINE(struct mtx, pflowif_list_mtx);
153 #define	V_pflowif_list_mtx	VNET(pflowif_list_mtx)
154 VNET_DEFINE(struct pflowstats_ctr,	 pflowstat);
155 #define	V_pflowstats	VNET(pflowstat)
156 
157 #define	PFLOW_LOCK(_sc)		mtx_lock(&(_sc)->sc_lock)
158 #define	PFLOW_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_lock)
159 #define	PFLOW_ASSERT(_sc)	mtx_assert(&(_sc)->sc_lock, MA_OWNED)
160 
161 SYSCTL_NODE(_net, OID_AUTO, pflow, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
162     "PFLOW");
163 SYSCTL_PROC(_net_pflow, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
164     0, 0, sysctl_pflowstats, "S,pflowstats",
165     "PFLOW statistics (struct pflowstats, net/if_pflow.h)");
166 
167 static inline void
168 pflowstat_inc(enum pflowstat_counters c)
169 {
170 	counter_u64_add(V_pflowstats.c[c], 1);
171 }
172 
173 static void
174 vnet_pflowattach(void)
175 {
176 	CK_LIST_INIT(&V_pflowif_list);
177 	mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF);
178 
179 	V_pflow_unr = new_unrhdr(0, PFLOW_MAX_ENTRIES - 1, &V_pflowif_list_mtx);
180 
181 	for (int i = 0; i < pflow_ncounters; i++)
182 		V_pflowstats.c[i] = counter_u64_alloc(M_WAITOK);
183 }
184 VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
185     vnet_pflowattach, NULL);
186 
187 static void
188 vnet_pflowdetach(void)
189 {
190 	struct pflow_softc	*sc;
191 
192 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
193 		pflow_destroy(sc->sc_id, false);
194 	}
195 
196 	MPASS(CK_LIST_EMPTY(&V_pflowif_list));
197 	delete_unrhdr(V_pflow_unr);
198 	mtx_destroy(&V_pflowif_list_mtx);
199 
200 	for (int i = 0; i < pflow_ncounters; i++)
201 		counter_u64_free(V_pflowstats.c[i]);
202 }
203 VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
204     vnet_pflowdetach, NULL);
205 
206 static void
207 vnet_pflow_finalise(void)
208 {
209 	/*
210 	 * Ensure we've freed all interfaces, and do not have pending
211 	 * epoch cleanup calls.
212 	 */
213 	NET_EPOCH_DRAIN_CALLBACKS();
214 }
215 VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
216     vnet_pflow_finalise, NULL);
217 
218 static void
219 pflow_output_process(void *arg)
220 {
221 	struct mbufq ml;
222 	struct pflow_softc *sc = arg;
223 	struct mbuf *m;
224 
225 	mbufq_init(&ml, 0);
226 
227 	PFLOW_LOCK(sc);
228 	mbufq_concat(&ml, &sc->sc_outputqueue);
229 	PFLOW_UNLOCK(sc);
230 
231 	CURVNET_SET(sc->sc_vnet);
232 	while ((m = mbufq_dequeue(&ml)) != NULL) {
233 		pflow_sendout_mbuf(sc, m);
234 	}
235 	CURVNET_RESTORE();
236 }
237 
238 static int
239 pflow_create(int unit)
240 {
241 	struct pflow_softc	*pflowif;
242 	int			 error;
243 
244 	pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
245 	mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF);
246 	pflowif->sc_version = PFLOW_PROTO_DEFAULT;
247 	pflowif->sc_observation_dom = PFLOW_ENGINE_TYPE;
248 
249 	/* ipfix template init */
250 	bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix));
251 	pflowif->sc_tmpl_ipfix.set_header.set_id =
252 	    htons(PFLOW_IPFIX_TMPL_SET_ID);
253 	pflowif->sc_tmpl_ipfix.set_header.set_length =
254 	    htons(sizeof(struct pflow_ipfix_tmpl));
255 
256 	/* ipfix IPv4 template */
257 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id =
258 	    htons(PFLOW_IPFIX_TMPL_IPV4_ID);
259 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count
260 	    = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT);
261 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id =
262 	    htons(PFIX_IE_sourceIPv4Address);
263 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4);
264 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id =
265 	    htons(PFIX_IE_destinationIPv4Address);
266 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4);
267 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id =
268 	    htons(PFIX_IE_ingressInterface);
269 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4);
270 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id =
271 	    htons(PFIX_IE_egressInterface);
272 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4);
273 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id =
274 	    htons(PFIX_IE_packetDeltaCount);
275 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8);
276 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id =
277 	    htons(PFIX_IE_octetDeltaCount);
278 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8);
279 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id =
280 	    htons(PFIX_IE_flowStartMilliseconds);
281 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8);
282 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id =
283 	    htons(PFIX_IE_flowEndMilliseconds);
284 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8);
285 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id =
286 	    htons(PFIX_IE_sourceTransportPort);
287 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2);
288 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id =
289 	    htons(PFIX_IE_destinationTransportPort);
290 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2);
291 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id =
292 	    htons(PFIX_IE_ipClassOfService);
293 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1);
294 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id =
295 	    htons(PFIX_IE_protocolIdentifier);
296 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1);
297 
298 	/* ipfix IPv6 template */
299 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id =
300 	    htons(PFLOW_IPFIX_TMPL_IPV6_ID);
301 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count =
302 	    htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT);
303 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id =
304 	    htons(PFIX_IE_sourceIPv6Address);
305 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16);
306 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id =
307 	    htons(PFIX_IE_destinationIPv6Address);
308 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16);
309 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id =
310 	    htons(PFIX_IE_ingressInterface);
311 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4);
312 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id =
313 	    htons(PFIX_IE_egressInterface);
314 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4);
315 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id =
316 	    htons(PFIX_IE_packetDeltaCount);
317 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8);
318 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id =
319 	    htons(PFIX_IE_octetDeltaCount);
320 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8);
321 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id =
322 	    htons(PFIX_IE_flowStartMilliseconds);
323 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8);
324 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id =
325 	    htons(PFIX_IE_flowEndMilliseconds);
326 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8);
327 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id =
328 	    htons(PFIX_IE_sourceTransportPort);
329 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2);
330 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id =
331 	    htons(PFIX_IE_destinationTransportPort);
332 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2);
333 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id =
334 	    htons(PFIX_IE_ipClassOfService);
335 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1);
336 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id =
337 	    htons(PFIX_IE_protocolIdentifier);
338 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1);
339 
340 	/* NAT44 create template */
341 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.tmpl_id =
342 	    htons(PFLOW_IPFIX_TMPL_NAT44_ID);
343 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.field_count =
344 	    htons(PFLOW_IPFIX_TMPL_NAT44_FIELD_COUNT);
345 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.field_id =
346 	    htons(PFIX_IE_timeStamp);
347 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.len =
348 	    htons(8);
349 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.field_id =
350 	    htons(PFIX_IE_natEvent);
351 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.len =
352 	    htons(1);
353 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.field_id =
354 	    htons(PFIX_IE_protocolIdentifier);
355 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.len = htons(1);
356 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.field_id =
357 	    htons(PFIX_IE_sourceIPv4Address);
358 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.len =
359 	    htons(4);
360 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.field_id =
361 	    htons(PFIX_IE_sourceTransportPort);
362 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.len = htons(2);
363 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.field_id =
364 	    htons(PFIX_IE_postNATSourceIPv4Address);
365 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.len =
366 	    htons(4);
367 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.field_id =
368 	    htons(PFIX_IE_postNAPTSourceTransportPort);
369 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.len =
370 	    htons(2);
371 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.field_id =
372 	    htons(PFIX_IE_destinationIPv4Address);
373 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.len =
374 	    htons(4);
375 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.field_id =
376 	    htons(PFIX_IE_destinationTransportPort);
377 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.len = htons(2);
378 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.field_id =
379 	    htons(PFIX_IE_postNATDestinationIPv4Address);
380 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.len =
381 	    htons(4);
382 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.field_id =
383 	    htons(PFIX_IE_postNAPTDestinationTransportPort);
384 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.len =
385 	    htons(2);
386 
387 	pflowif->sc_id = unit;
388 	pflowif->sc_vnet = curvnet;
389 
390 	mbufq_init(&pflowif->sc_outputqueue, 8192);
391 	pflow_setmtu(pflowif, ETHERMTU);
392 
393 	callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0);
394 	callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0);
395 	callout_init_mtx(&pflowif->sc_tmo_nat4, &pflowif->sc_lock, 0);
396 	callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0);
397 
398 	error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process,
399 	    pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie);
400 	if (error) {
401 		free(pflowif, M_DEVBUF);
402 		return (error);
403 	}
404 
405 	/* Insert into list of pflows */
406 	mtx_lock(&V_pflowif_list_mtx);
407 	CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next);
408 	mtx_unlock(&V_pflowif_list_mtx);
409 
410 	V_pflow_export_state_ptr = export_pflow;
411 
412 	return (0);
413 }
414 
415 static void
416 pflow_free_cb(struct epoch_context *ctx)
417 {
418 	struct pflow_softc *sc;
419 
420 	sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx);
421 
422 	free(sc, M_DEVBUF);
423 }
424 
425 static int
426 pflow_destroy(int unit, bool drain)
427 {
428 	struct pflow_softc	*sc;
429 	int			 error __diagused;
430 
431 	mtx_lock(&V_pflowif_list_mtx);
432 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
433 		if (sc->sc_id == unit)
434 			break;
435 	}
436 	if (sc == NULL) {
437 		mtx_unlock(&V_pflowif_list_mtx);
438 		return (ENOENT);
439 	}
440 	CK_LIST_REMOVE(sc, sc_next);
441 	if (CK_LIST_EMPTY(&V_pflowif_list))
442 		V_pflow_export_state_ptr = NULL;
443 	mtx_unlock(&V_pflowif_list_mtx);
444 
445 	sc->sc_dying = 1;
446 
447 	if (drain) {
448 		/* Let's be sure no one is using this interface any more. */
449 		NET_EPOCH_DRAIN_CALLBACKS();
450 	}
451 
452 	error = swi_remove(sc->sc_swi_cookie);
453 	MPASS(error == 0);
454 	error = intr_event_destroy(sc->sc_swi_ie);
455 	MPASS(error == 0);
456 
457 	callout_drain(&sc->sc_tmo);
458 	callout_drain(&sc->sc_tmo6);
459 	callout_drain(&sc->sc_tmo_nat4);
460 	callout_drain(&sc->sc_tmo_tmpl);
461 
462 	m_freem(sc->sc_mbuf);
463 	m_freem(sc->sc_mbuf6);
464 	m_freem(sc->sc_mbuf_nat4);
465 
466 	PFLOW_LOCK(sc);
467 	mbufq_drain(&sc->sc_outputqueue);
468 	if (sc->so != NULL) {
469 		soclose(sc->so);
470 		sc->so = NULL;
471 	}
472 	if (sc->sc_flowdst != NULL)
473 		free(sc->sc_flowdst, M_DEVBUF);
474 	if (sc->sc_flowsrc != NULL)
475 		free(sc->sc_flowsrc, M_DEVBUF);
476 	PFLOW_UNLOCK(sc);
477 
478 	mtx_destroy(&sc->sc_lock);
479 
480 	free_unr(V_pflow_unr, unit);
481 
482 	NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx);
483 
484 	return (0);
485 }
486 
487 static int
488 pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port)
489 {
490 	const struct sockaddr_in6	*sin6;
491 	const struct sockaddr_in	*sin;
492 
493 	if (sa == NULL)
494 		return (0);
495 	switch(sa->sa_family) {
496 	case AF_INET:
497 		sin = (const struct sockaddr_in *)sa;
498 		return (sin->sin_addr.s_addr != INADDR_ANY &&
499 		    (ignore_port || sin->sin_port != 0));
500 	case AF_INET6:
501 		sin6 = (const struct sockaddr_in6 *)sa;
502 		return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
503 		    (ignore_port || sin6->sin6_port != 0));
504 	default:
505 		return (0);
506 	}
507 }
508 
509 int
510 pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
511 {
512 	size_t min;
513 
514 	sc->sc_maxcount4 = (mtu - hdrsz -
515 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
516 	sc->sc_maxcount6 = (mtu - hdrsz -
517 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6);
518 	sc->sc_maxcount_nat4 = (mtu - hdrsz -
519 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_nat4);
520 	if (sc->sc_maxcount4 > PFLOW_MAXFLOWS)
521 		sc->sc_maxcount4 = PFLOW_MAXFLOWS;
522 	if (sc->sc_maxcount6 > PFLOW_MAXFLOWS)
523 		sc->sc_maxcount6 = PFLOW_MAXFLOWS;
524 	if (sc->sc_maxcount_nat4 > PFLOW_MAXFLOWS)
525 		sc->sc_maxcount_nat4 = PFLOW_MAXFLOWS;
526 
527 	min = MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4),
528 	    sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6));
529 	min = MIN(min, sc->sc_maxcount_nat4 * sizeof(struct pflow_ipfix_nat4));
530 
531 	return (hdrsz + sizeof(struct udpiphdr) + min);
532 }
533 
534 static void
535 pflow_setmtu(struct pflow_softc *sc, int mtu_req)
536 {
537 	int	mtu;
538 
539 	mtu = mtu_req;
540 
541 	switch (sc->sc_version) {
542 	case PFLOW_PROTO_5:
543 		sc->sc_maxcount = (mtu - sizeof(struct pflow_header) -
544 		    sizeof(struct udpiphdr)) / sizeof(struct pflow_flow);
545 		if (sc->sc_maxcount > PFLOW_MAXFLOWS)
546 		    sc->sc_maxcount = PFLOW_MAXFLOWS;
547 		break;
548 	case PFLOW_PROTO_10:
549 		pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header));
550 		break;
551 	default: /* NOTREACHED */
552 		break;
553 	}
554 }
555 
556 static struct mbuf *
557 pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id)
558 {
559 	struct pflow_set_header	 set_hdr;
560 	struct pflow_header	 h;
561 	struct mbuf		*m;
562 
563 	MGETHDR(m, M_NOWAIT, MT_DATA);
564 	if (m == NULL) {
565 		pflowstat_inc(pflow_onomem);
566 		return (NULL);
567 	}
568 
569 	MCLGET(m, M_NOWAIT);
570 	if ((m->m_flags & M_EXT) == 0) {
571 		m_free(m);
572 		pflowstat_inc(pflow_onomem);
573 		return (NULL);
574 	}
575 
576 	m->m_len = m->m_pkthdr.len = 0;
577 
578 	if (sc == NULL)		/* get only a new empty mbuf */
579 		return (m);
580 
581 	switch (sc->sc_version) {
582 	case PFLOW_PROTO_5:
583 		/* populate pflow_header */
584 		h.reserved1 = 0;
585 		h.reserved2 = 0;
586 		h.count = 0;
587 		h.version = htons(PFLOW_PROTO_5);
588 		h.flow_sequence = htonl(sc->sc_gcounter);
589 		h.engine_type = PFLOW_ENGINE_TYPE;
590 		h.engine_id = PFLOW_ENGINE_ID;
591 		m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h);
592 
593 		sc->sc_count = 0;
594 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
595 		    pflow_timeout, sc);
596 		break;
597 	case PFLOW_PROTO_10:
598 		/* populate pflow_set_header */
599 		set_hdr.set_length = 0;
600 		set_hdr.set_id = htons(set_id);
601 		m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr);
602 		break;
603 	default: /* NOTREACHED */
604 		break;
605 	}
606 
607 	return (m);
608 }
609 
610 static void
611 copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
612     const struct pf_kstate *st, struct pf_state_key *sk, int src, int dst)
613 {
614 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
615 	flow1->src_port = flow2->dest_port = sk->port[src];
616 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
617 	flow1->dest_port = flow2->src_port = sk->port[dst];
618 
619 	flow1->dest_as = flow2->src_as =
620 	    flow1->src_as = flow2->dest_as = 0;
621 	flow1->if_index_in = htons(st->if_index_in);
622 	flow1->if_index_out = htons(st->if_index_out);
623 	flow2->if_index_in = htons(st->if_index_out);
624 	flow2->if_index_out = htons(st->if_index_in);
625 	flow1->dest_mask = flow2->src_mask =
626 	    flow1->src_mask = flow2->dest_mask = 0;
627 
628 	flow1->flow_packets = htonl(st->packets[0]);
629 	flow2->flow_packets = htonl(st->packets[1]);
630 	flow1->flow_octets = htonl(st->bytes[0]);
631 	flow2->flow_octets = htonl(st->bytes[1]);
632 
633 	/*
634 	 * Pretend the flow was created or expired when the machine came up
635 	 * when creation is in the future of the last time a package was seen
636 	 * or was created / expired before this machine came up due to pfsync.
637 	 */
638 	flow1->flow_start = flow2->flow_start = st->creation < 0 ||
639 	    st->creation > st->expire ? htonl(0) : htonl(st->creation);
640 	flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) :
641 	    htonl(st->expire);
642 	flow1->tcp_flags = flow2->tcp_flags = 0;
643 	flow1->protocol = flow2->protocol = sk->proto;
644 	flow1->tos = flow2->tos = st->rule->tos;
645 }
646 
647 static void
648 copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1,
649     struct pflow_ipfix_flow4 *flow2, const struct pf_kstate *st,
650     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
651 {
652 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
653 	flow1->src_port = flow2->dest_port = sk->port[src];
654 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
655 	flow1->dest_port = flow2->src_port = sk->port[dst];
656 
657 	flow1->if_index_in = htonl(st->if_index_in);
658 	flow1->if_index_out = htonl(st->if_index_out);
659 	flow2->if_index_in = htonl(st->if_index_out);
660 	flow2->if_index_out = htonl(st->if_index_in);
661 
662 	flow1->flow_packets = htobe64(st->packets[0]);
663 	flow2->flow_packets = htobe64(st->packets[1]);
664 	flow1->flow_octets = htobe64(st->bytes[0]);
665 	flow2->flow_octets = htobe64(st->bytes[1]);
666 
667 	/*
668 	 * Pretend the flow was created when the machine came up when creation
669 	 * is in the future of the last time a package was seen due to pfsync.
670 	 */
671 	if (st->creation > st->expire)
672 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
673 		    time_uptime)*1000);
674 	else
675 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
676 		    (pf_get_uptime() - st->creation)));
677 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
678 	    (pf_get_uptime() - st->expire)));
679 
680 	flow1->protocol = flow2->protocol = sk->proto;
681 	flow1->tos = flow2->tos = st->rule->tos;
682 }
683 
684 static void
685 copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1,
686     struct pflow_ipfix_flow6 *flow2, const struct pf_kstate *st,
687     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
688 {
689 	bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip));
690 	bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip));
691 	flow1->src_port = flow2->dest_port = sk->port[src];
692 	bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip));
693 	bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip));
694 	flow1->dest_port = flow2->src_port = sk->port[dst];
695 
696 	flow1->if_index_in = htonl(st->if_index_in);
697 	flow1->if_index_out = htonl(st->if_index_out);
698 	flow2->if_index_in = htonl(st->if_index_out);
699 	flow2->if_index_out = htonl(st->if_index_in);
700 
701 	flow1->flow_packets = htobe64(st->packets[0]);
702 	flow2->flow_packets = htobe64(st->packets[1]);
703 	flow1->flow_octets = htobe64(st->bytes[0]);
704 	flow2->flow_octets = htobe64(st->bytes[1]);
705 
706 	/*
707 	 * Pretend the flow was created when the machine came up when creation
708 	 * is in the future of the last time a package was seen due to pfsync.
709 	 */
710 	if (st->creation > st->expire)
711 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
712 		    time_uptime)*1000);
713 	else
714 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
715 		    (pf_get_uptime() - st->creation)));
716 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
717 	    (pf_get_uptime() - st->expire)));
718 
719 	flow1->protocol = flow2->protocol = sk->proto;
720 	flow1->tos = flow2->tos = st->rule->tos;
721 }
722 
723 static void
724 copy_nat_ipfix_4_data(struct pflow_ipfix_nat4 *nat1,
725     struct pflow_ipfix_nat4 *nat2, const struct pf_kstate *st,
726     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
727 {
728 	nat1->src_ip = nat2->dest_ip = st->key[PF_SK_STACK]->addr[src].v4.s_addr;
729 	nat1->src_port = nat2->dest_port = st->key[PF_SK_STACK]->port[src];
730 	nat1->dest_ip = nat2->src_ip = st->key[PF_SK_STACK]->addr[dst].v4.s_addr;
731 	nat1->dest_port = nat2->src_port = st->key[PF_SK_STACK]->port[dst];
732 	nat1->postnat_src_ip = nat2->postnat_dest_ip = st->key[PF_SK_WIRE]->addr[src].v4.s_addr;
733 	nat1->postnat_src_port = nat2->postnat_dest_port = st->key[PF_SK_WIRE]->port[src];
734 	nat1->postnat_dest_ip = nat2->postnat_src_ip = st->key[PF_SK_WIRE]->addr[dst].v4.s_addr;
735 	nat1->postnat_dest_port = nat2->postnat_src_port = st->key[PF_SK_WIRE]->port[dst];
736 	nat1->protocol = nat2->protocol = sk->proto;
737 
738 	/*
739 	 * Because we have to generate a create and delete event we'll fill out the
740 	 * timestamp and nat_event fields when we transmit. As opposed to doing this
741 	 * work a second time.
742 	*/
743 }
744 
745 static void
746 export_pflow(const struct pf_kstate *st)
747 {
748 	struct pflow_softc	*sc = NULL;
749 	struct pf_state_key	*sk;
750 
751 	NET_EPOCH_ASSERT();
752 
753 	/* e.g. if pf_state_key_attach() fails. */
754 	if (st->key[PF_SK_STACK] == NULL || st->key[PF_SK_WIRE] == NULL)
755 		return;
756 
757 	sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
758 
759 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
760 		PFLOW_LOCK(sc);
761 		switch (sc->sc_version) {
762 		case PFLOW_PROTO_5:
763 			if (sk->af == AF_INET)
764 				export_pflow_if(st, sk, sc);
765 			break;
766 		case PFLOW_PROTO_10:
767 			if (sk->af == AF_INET || sk->af == AF_INET6)
768 				export_pflow_if(st, sk, sc);
769 			break;
770 		default: /* NOTREACHED */
771 			break;
772 		}
773 		PFLOW_UNLOCK(sc);
774 	}
775 }
776 
777 static int
778 export_pflow_if(const struct pf_kstate *st, struct pf_state_key *sk,
779     struct pflow_softc *sc)
780 {
781 	struct pf_kstate	 pfs_copy;
782 	u_int64_t		 bytes[2];
783 	int			 ret = 0;
784 
785 	if (sc->sc_version == PFLOW_PROTO_10)
786 		return (pflow_pack_flow_ipfix(st, sk, sc));
787 
788 	/* PFLOW_PROTO_5 */
789 	if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
790 	    && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
791 		return (pflow_pack_flow(st, sk, sc));
792 
793 	/* flow > PFLOW_MAXBYTES need special handling */
794 	bcopy(st, &pfs_copy, sizeof(pfs_copy));
795 	bytes[0] = pfs_copy.bytes[0];
796 	bytes[1] = pfs_copy.bytes[1];
797 
798 	while (bytes[0] > PFLOW_MAXBYTES) {
799 		pfs_copy.bytes[0] = PFLOW_MAXBYTES;
800 		pfs_copy.bytes[1] = 0;
801 
802 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
803 			return (ret);
804 		if ((bytes[0] - PFLOW_MAXBYTES) > 0)
805 			bytes[0] -= PFLOW_MAXBYTES;
806 	}
807 
808 	while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
809 		pfs_copy.bytes[1] = PFLOW_MAXBYTES;
810 		pfs_copy.bytes[0] = 0;
811 
812 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
813 			return (ret);
814 		if ((bytes[1] - PFLOW_MAXBYTES) > 0)
815 			bytes[1] -= PFLOW_MAXBYTES;
816 	}
817 
818 	pfs_copy.bytes[0] = bytes[0];
819 	pfs_copy.bytes[1] = bytes[1];
820 
821 	return (pflow_pack_flow(&pfs_copy, sk, sc));
822 }
823 
824 static int
825 copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc)
826 {
827 	int		ret = 0;
828 
829 	PFLOW_ASSERT(sc);
830 
831 	if (sc->sc_mbuf == NULL) {
832 		if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
833 			return (ENOBUFS);
834 	}
835 	m_copyback(sc->sc_mbuf, PFLOW_HDRLEN +
836 	    (sc->sc_count * sizeof(struct pflow_flow)),
837 	    sizeof(struct pflow_flow), (caddr_t)flow);
838 
839 	pflowstat_inc(pflow_flows);
840 	sc->sc_gcounter++;
841 	sc->sc_count++;
842 
843 	if (sc->sc_count >= sc->sc_maxcount)
844 		ret = pflow_sendout_v5(sc);
845 
846 	return(ret);
847 }
848 
849 static int
850 copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc)
851 {
852 	int		ret = 0;
853 
854 	PFLOW_ASSERT(sc);
855 
856 	if (sc->sc_mbuf == NULL) {
857 		if ((sc->sc_mbuf =
858 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
859 			return (ENOBUFS);
860 		}
861 		sc->sc_count4 = 0;
862 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
863 		    pflow_timeout, sc);
864 	}
865 	m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN +
866 	    (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)),
867 	    sizeof(struct pflow_ipfix_flow4), (caddr_t)flow);
868 
869 	pflowstat_inc(pflow_flows);
870 	sc->sc_gcounter++;
871 	sc->sc_count4++;
872 
873 	if (sc->sc_count4 >= sc->sc_maxcount4)
874 		ret = pflow_sendout_ipfix(sc, PFLOW_INET);
875 	return(ret);
876 }
877 
878 static int
879 copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc)
880 {
881 	int		ret = 0;
882 
883 	PFLOW_ASSERT(sc);
884 
885 	if (sc->sc_mbuf6 == NULL) {
886 		if ((sc->sc_mbuf6 =
887 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
888 			return (ENOBUFS);
889 		}
890 		sc->sc_count6 = 0;
891 		callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz,
892 		    pflow_timeout6, sc);
893 	}
894 	m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN +
895 	    (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)),
896 	    sizeof(struct pflow_ipfix_flow6), (caddr_t)flow);
897 
898 	pflowstat_inc(pflow_flows);
899 	sc->sc_gcounter++;
900 	sc->sc_count6++;
901 
902 	if (sc->sc_count6 >= sc->sc_maxcount6)
903 		ret = pflow_sendout_ipfix(sc, PFLOW_INET6);
904 
905 	return(ret);
906 }
907 
908 int
909 copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *nat, const struct pf_kstate *st,
910     struct pflow_softc *sc, uint8_t event, uint64_t timestamp)
911 {
912 	int		ret = 0;
913 
914 	PFLOW_ASSERT(sc);
915 
916 	if (sc->sc_mbuf_nat4 == NULL) {
917 		if ((sc->sc_mbuf_nat4 =
918 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_NAT44_ID)) == NULL) {
919 			return (ENOBUFS);
920 		}
921 		sc->sc_count_nat4 = 0;
922 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
923 		    pflow_timeout_nat4, sc);
924 	}
925 
926 	nat->nat_event = event;
927 	nat->timestamp = htobe64(pf_get_time() - (pf_get_uptime() - timestamp));
928 	m_copyback(sc->sc_mbuf_nat4, PFLOW_SET_HDRLEN +
929 	    (sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4)),
930 	    sizeof(struct pflow_ipfix_nat4), (caddr_t)nat);
931 	sc->sc_count_nat4++;
932 
933 	pflowstat_inc(pflow_flows);
934 	sc->sc_gcounter++;
935 
936 	if (sc->sc_count_nat4 >= sc->sc_maxcount_nat4)
937 		ret = pflow_sendout_ipfix(sc, PFLOW_NAT4);
938 
939 	return (ret);
940 }
941 
942 static int
943 pflow_pack_flow(const struct pf_kstate *st, struct pf_state_key *sk,
944     struct pflow_softc *sc)
945 {
946 	struct pflow_flow	 flow1;
947 	struct pflow_flow	 flow2;
948 	int			 ret = 0;
949 
950 	bzero(&flow1, sizeof(flow1));
951 	bzero(&flow2, sizeof(flow2));
952 
953 	if (st->direction == PF_OUT)
954 		copy_flow_data(&flow1, &flow2, st, sk, 1, 0);
955 	else
956 		copy_flow_data(&flow1, &flow2, st, sk, 0, 1);
957 
958 	if (st->bytes[0] != 0) /* first flow from state */
959 		ret = copy_flow_to_m(&flow1, sc);
960 
961 	if (st->bytes[1] != 0) /* second flow from state */
962 		ret = copy_flow_to_m(&flow2, sc);
963 
964 	return (ret);
965 }
966 
967 static bool
968 pflow_is_natd(const struct pf_kstate *st)
969 {
970 	/* If ports or addresses are different we've been NAT-ed. */
971 	return (memcmp(st->key[PF_SK_WIRE], st->key[PF_SK_STACK],
972 	    sizeof(struct pf_addr) * 2 + sizeof(uint16_t) * 2) != 0);
973 }
974 
975 static int
976 pflow_pack_flow_ipfix(const struct pf_kstate *st, struct pf_state_key *sk,
977     struct pflow_softc *sc)
978 {
979 	struct pflow_ipfix_flow4	 flow4_1, flow4_2;
980 	struct pflow_ipfix_nat4		 nat4_1, nat4_2;
981 	struct pflow_ipfix_flow6	 flow6_1, flow6_2;
982 	int				 ret = 0;
983 	bool				 nat = false;
984 
985 	switch (sk->af) {
986 	case AF_INET:
987 		bzero(&flow4_1, sizeof(flow4_1));
988 		bzero(&flow4_2, sizeof(flow4_2));
989 
990 		nat = pflow_is_natd(st);
991 
992 		if (st->direction == PF_OUT)
993 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
994 			    1, 0);
995 		else
996 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
997 			    0, 1);
998 
999 		if (nat)
1000 			copy_nat_ipfix_4_data(&nat4_1, &nat4_2, st, sk, sc, 1, 0);
1001 
1002 		if (st->bytes[0] != 0) /* first flow from state */ {
1003 			ret = copy_flow_ipfix_4_to_m(&flow4_1, sc);
1004 
1005 			if (ret == 0 && nat) {
1006 				ret = copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1007 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1008 				ret |= copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1009 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1010 			}
1011 		}
1012 
1013 		if (st->bytes[1] != 0) /* second flow from state */ {
1014 			ret = copy_flow_ipfix_4_to_m(&flow4_2, sc);
1015 
1016 			if (ret == 0 && nat) {
1017 				ret = copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1018 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1019 				ret |= copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1020 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1021 			}
1022 		}
1023 		break;
1024 	case AF_INET6:
1025 		bzero(&flow6_1, sizeof(flow6_1));
1026 		bzero(&flow6_2, sizeof(flow6_2));
1027 
1028 		if (st->direction == PF_OUT)
1029 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1030 			    1, 0);
1031 		else
1032 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1033 			    0, 1);
1034 
1035 		if (st->bytes[0] != 0) /* first flow from state */
1036 			ret = copy_flow_ipfix_6_to_m(&flow6_1, sc);
1037 
1038 		if (st->bytes[1] != 0) /* second flow from state */
1039 			ret = copy_flow_ipfix_6_to_m(&flow6_2, sc);
1040 		break;
1041 	}
1042 	return (ret);
1043 }
1044 
1045 static void
1046 pflow_timeout(void *v)
1047 {
1048 	struct pflow_softc	*sc = v;
1049 
1050 	PFLOW_ASSERT(sc);
1051 	CURVNET_SET(sc->sc_vnet);
1052 
1053 	switch (sc->sc_version) {
1054 	case PFLOW_PROTO_5:
1055 		pflow_sendout_v5(sc);
1056 		break;
1057 	case PFLOW_PROTO_10:
1058 		pflow_sendout_ipfix(sc, PFLOW_INET);
1059 		break;
1060 	default: /* NOTREACHED */
1061 		panic("Unsupported version %d", sc->sc_version);
1062 		break;
1063 	}
1064 
1065 	CURVNET_RESTORE();
1066 }
1067 
1068 static void
1069 pflow_timeout6(void *v)
1070 {
1071 	struct pflow_softc	*sc = v;
1072 
1073 	PFLOW_ASSERT(sc);
1074 
1075 	if (sc->sc_version != PFLOW_PROTO_10)
1076 		return;
1077 
1078 	CURVNET_SET(sc->sc_vnet);
1079 	pflow_sendout_ipfix(sc, PFLOW_INET6);
1080 	CURVNET_RESTORE();
1081 }
1082 
1083 static void
1084 pflow_timeout_tmpl(void *v)
1085 {
1086 	struct pflow_softc	*sc = v;
1087 
1088 	PFLOW_ASSERT(sc);
1089 
1090 	if (sc->sc_version != PFLOW_PROTO_10)
1091 		return;
1092 
1093 	CURVNET_SET(sc->sc_vnet);
1094 	pflow_sendout_ipfix_tmpl(sc);
1095 	CURVNET_RESTORE();
1096 }
1097 
1098 static void
1099 pflow_timeout_nat4(void *v)
1100 {
1101 	struct pflow_softc	*sc = v;
1102 
1103 	PFLOW_ASSERT(sc);
1104 
1105 	if (sc->sc_version != PFLOW_PROTO_10)
1106 		return;
1107 
1108 	CURVNET_SET(sc->sc_vnet);
1109 	pflow_sendout_ipfix(sc, PFLOW_NAT4);
1110 	CURVNET_RESTORE();
1111 }
1112 
1113 static void
1114 pflow_flush(struct pflow_softc *sc)
1115 {
1116 	PFLOW_ASSERT(sc);
1117 
1118 	switch (sc->sc_version) {
1119 	case PFLOW_PROTO_5:
1120 		pflow_sendout_v5(sc);
1121 		break;
1122 	case PFLOW_PROTO_10:
1123 		pflow_sendout_ipfix(sc, PFLOW_INET);
1124 		pflow_sendout_ipfix(sc, PFLOW_INET6);
1125 		pflow_sendout_ipfix(sc, PFLOW_NAT4);
1126 		break;
1127 	default: /* NOTREACHED */
1128 		break;
1129 	}
1130 }
1131 
1132 static int
1133 pflow_sendout_v5(struct pflow_softc *sc)
1134 {
1135 	struct mbuf		*m = sc->sc_mbuf;
1136 	struct pflow_header	*h;
1137 	struct timespec		tv;
1138 
1139 	PFLOW_ASSERT(sc);
1140 
1141 	if (m == NULL)
1142 		return (0);
1143 
1144 	sc->sc_mbuf = NULL;
1145 
1146 	pflowstat_inc(pflow_packets);
1147 	h = mtod(m, struct pflow_header *);
1148 	h->count = htons(sc->sc_count);
1149 
1150 	/* populate pflow_header */
1151 	h->uptime_ms = htonl(time_uptime * 1000);
1152 
1153 	getnanotime(&tv);
1154 	h->time_sec = htonl(tv.tv_sec);			/* XXX 2038 */
1155 	h->time_nanosec = htonl(tv.tv_nsec);
1156 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1157 		swi_sched(sc->sc_swi_cookie, 0);
1158 
1159 	return (0);
1160 }
1161 
1162 static int
1163 pflow_sendout_ipfix(struct pflow_softc *sc, enum pflow_family_t af)
1164 {
1165 	struct mbuf			*m;
1166 	struct pflow_v10_header		*h10;
1167 	struct pflow_set_header		*set_hdr;
1168 	u_int32_t			 count;
1169 	int				 set_length;
1170 
1171 	PFLOW_ASSERT(sc);
1172 
1173 	switch (af) {
1174 	case PFLOW_INET:
1175 		m = sc->sc_mbuf;
1176 		callout_stop(&sc->sc_tmo);
1177 		if (m == NULL)
1178 			return (0);
1179 		sc->sc_mbuf = NULL;
1180 		count = sc->sc_count4;
1181 		set_length = sizeof(struct pflow_set_header)
1182 		    + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4);
1183 		break;
1184 	case PFLOW_INET6:
1185 		m = sc->sc_mbuf6;
1186 		callout_stop(&sc->sc_tmo6);
1187 		if (m == NULL)
1188 			return (0);
1189 		sc->sc_mbuf6 = NULL;
1190 		count = sc->sc_count6;
1191 		set_length = sizeof(struct pflow_set_header)
1192 		    + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6);
1193 		break;
1194 	case PFLOW_NAT4:
1195 		m = sc->sc_mbuf_nat4;
1196 		callout_stop(&sc->sc_tmo_nat4);
1197 		if (m == NULL)
1198 			return (0);
1199 		sc->sc_mbuf_nat4 = NULL;
1200 		count = sc->sc_count_nat4;
1201 		set_length = sizeof(struct pflow_set_header)
1202 		    + sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4);
1203 		break;
1204 	default:
1205 		panic("Unsupported AF %d", af);
1206 	}
1207 
1208 	pflowstat_inc(pflow_packets);
1209 
1210 	set_hdr = mtod(m, struct pflow_set_header *);
1211 	set_hdr->set_length = htons(set_length);
1212 
1213 	/* populate pflow_header */
1214 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1215 	if (m == NULL) {
1216 		pflowstat_inc(pflow_onomem);
1217 		return (ENOBUFS);
1218 	}
1219 	h10 = mtod(m, struct pflow_v10_header *);
1220 	h10->version = htons(PFLOW_PROTO_10);
1221 	h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length);
1222 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1223 	h10->flow_sequence = htonl(sc->sc_sequence);
1224 	sc->sc_sequence += count;
1225 	h10->observation_dom = htonl(sc->sc_observation_dom);
1226 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1227 		swi_sched(sc->sc_swi_cookie, 0);
1228 
1229 	return (0);
1230 }
1231 
1232 static int
1233 pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
1234 {
1235 	struct mbuf			*m;
1236 	struct pflow_v10_header		*h10;
1237 
1238 	PFLOW_ASSERT(sc);
1239 
1240 	m = pflow_get_mbuf(sc, 0);
1241 	if (m == NULL)
1242 		return (0);
1243 	m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
1244 	    (caddr_t)&sc->sc_tmpl_ipfix);
1245 
1246 	pflowstat_inc(pflow_packets);
1247 
1248 	/* populate pflow_header */
1249 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1250 	if (m == NULL) {
1251 		pflowstat_inc(pflow_onomem);
1252 		return (ENOBUFS);
1253 	}
1254 	h10 = mtod(m, struct pflow_v10_header *);
1255 	h10->version = htons(PFLOW_PROTO_10);
1256 	h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct
1257 	    pflow_ipfix_tmpl));
1258 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1259 	h10->flow_sequence = htonl(sc->sc_sequence);
1260 	h10->observation_dom = htonl(sc->sc_observation_dom);
1261 
1262 	callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1263 	    pflow_timeout_tmpl, sc);
1264 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1265 		swi_sched(sc->sc_swi_cookie, 0);
1266 
1267 	return (0);
1268 }
1269 
1270 static int
1271 pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
1272 {
1273 	if (sc->so == NULL) {
1274 		m_freem(m);
1275 		return (EINVAL);
1276 	}
1277 	return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread));
1278 }
1279 
1280 static int
1281 sysctl_pflowstats(SYSCTL_HANDLER_ARGS)
1282 {
1283 	struct pflowstats pflowstats;
1284 
1285 	pflowstats.pflow_flows =
1286 	    counter_u64_fetch(V_pflowstats.c[pflow_flows]);
1287 	pflowstats.pflow_packets =
1288 	    counter_u64_fetch(V_pflowstats.c[pflow_packets]);
1289 	pflowstats.pflow_onomem =
1290 	    counter_u64_fetch(V_pflowstats.c[pflow_onomem]);
1291 	pflowstats.pflow_oerrors =
1292 	    counter_u64_fetch(V_pflowstats.c[pflow_oerrors]);
1293 
1294 	return (sysctl_handle_opaque(oidp, &pflowstats, sizeof(pflowstats), req));
1295 }
1296 
1297 static int
1298 pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt)
1299 {
1300 	struct epoch_tracker	 et;
1301 	struct pflow_softc	*sc = NULL;
1302 	struct nl_writer	 *nw = npt->nw;
1303 	int			 error = 0;
1304 
1305 	hdr->nlmsg_flags |= NLM_F_MULTI;
1306 
1307 	NET_EPOCH_ENTER(et);
1308 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1309 		if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1310 			error = ENOMEM;
1311 			goto out;
1312 		}
1313 
1314 		struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1315 		ghdr_new->cmd = PFLOWNL_CMD_LIST;
1316 		ghdr_new->version = 0;
1317 		ghdr_new->reserved = 0;
1318 
1319 		nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id);
1320 
1321 		if (! nlmsg_end(nw)) {
1322 			error = ENOMEM;
1323 			goto out;
1324 		}
1325 	}
1326 
1327 out:
1328 	NET_EPOCH_EXIT(et);
1329 
1330 	if (error != 0)
1331 		nlmsg_abort(nw);
1332 
1333 	return (error);
1334 }
1335 
1336 static int
1337 pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt)
1338 {
1339 	struct nl_writer	 *nw = npt->nw;
1340 	int			 error = 0;
1341 	int			 unit;
1342 
1343 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1344 		return (ENOMEM);
1345 	}
1346 
1347 	struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1348 	ghdr_new->cmd = PFLOWNL_CMD_CREATE;
1349 	ghdr_new->version = 0;
1350 	ghdr_new->reserved = 0;
1351 
1352 	unit = alloc_unr(V_pflow_unr);
1353 	if (unit == -1) {
1354 		nlmsg_abort(nw);
1355 		return (ENOMEM);
1356 	}
1357 
1358 	error = pflow_create(unit);
1359 	if (error != 0) {
1360 		free_unr(V_pflow_unr, unit);
1361 		nlmsg_abort(nw);
1362 		return (error);
1363 	}
1364 
1365 	nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit);
1366 
1367 	if (! nlmsg_end(nw)) {
1368 		pflow_destroy(unit, true);
1369 		return (ENOMEM);
1370 	}
1371 
1372 	return (0);
1373 }
1374 
1375 struct pflow_parsed_del {
1376 	int id;
1377 };
1378 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1379 #define	_OUT(_field)	offsetof(struct pflow_parsed_del, _field)
1380 static const struct nlattr_parser nla_p_del[] = {
1381 	{ .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1382 };
1383 static const struct nlfield_parser nlf_p_del[] = {};
1384 #undef _IN
1385 #undef _OUT
1386 NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del);
1387 
1388 static int
1389 pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt)
1390 {
1391 	struct pflow_parsed_del d = {};
1392 	int error;
1393 
1394 	error = nl_parse_nlmsg(hdr, &del_parser, npt, &d);
1395 	if (error != 0)
1396 		return (error);
1397 
1398 	error = pflow_destroy(d.id, true);
1399 
1400 	return (error);
1401 }
1402 
1403 struct pflow_parsed_get {
1404 	int id;
1405 };
1406 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1407 #define	_OUT(_field)	offsetof(struct pflow_parsed_get, _field)
1408 static const struct nlattr_parser nla_p_get[] = {
1409 	{ .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1410 };
1411 static const struct nlfield_parser nlf_p_get[] = {};
1412 #undef _IN
1413 #undef _OUT
1414 NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get);
1415 
1416 static bool
1417 nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s)
1418 {
1419 	int off = nlattr_add_nested(nw, attr);
1420 	if (off == 0)
1421 		return (false);
1422 
1423 	nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family);
1424 
1425 	switch (s->sa_family) {
1426 	case AF_INET: {
1427 		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
1428 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port);
1429 		nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr);
1430 		break;
1431 	}
1432 	case AF_INET6: {
1433 		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
1434 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port);
1435 		nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr);
1436 		break;
1437 	}
1438 	default:
1439 		panic("Unknown address family %d", s->sa_family);
1440 	}
1441 
1442 	nlattr_set_len(nw, off);
1443 	return (true);
1444 }
1445 
1446 static int
1447 pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
1448 {
1449 	struct epoch_tracker et;
1450 	struct pflow_parsed_get g = {};
1451 	struct pflow_softc *sc = NULL;
1452 	struct nl_writer *nw = npt->nw;
1453 	struct genlmsghdr *ghdr_new;
1454 	int error;
1455 
1456 	error = nl_parse_nlmsg(hdr, &get_parser, npt, &g);
1457 	if (error != 0)
1458 		return (error);
1459 
1460 	NET_EPOCH_ENTER(et);
1461 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1462 		if (sc->sc_id == g.id)
1463 			break;
1464 	}
1465 	if (sc == NULL) {
1466 		error = ENOENT;
1467 		goto out;
1468 	}
1469 
1470 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1471 		nlmsg_abort(nw);
1472 		error = ENOMEM;
1473 		goto out;
1474 	}
1475 
1476 	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1477 	if (ghdr_new == NULL) {
1478 		nlmsg_abort(nw);
1479 		error = ENOMEM;
1480 		goto out;
1481 	}
1482 
1483 	ghdr_new->cmd = PFLOWNL_CMD_GET;
1484 	ghdr_new->version = 0;
1485 	ghdr_new->reserved = 0;
1486 
1487 	nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id);
1488 	nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version);
1489 	if (sc->sc_flowsrc)
1490 		nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc);
1491 	if (sc->sc_flowdst)
1492 		nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst);
1493 	nlattr_add_u32(nw, PFLOWNL_GET_OBSERVATION_DOMAIN,
1494 	    sc->sc_observation_dom);
1495 	nlattr_add_u8(nw, PFLOWNL_GET_SOCKET_STATUS, sc->so != NULL);
1496 
1497 	if (! nlmsg_end(nw)) {
1498 		nlmsg_abort(nw);
1499 		error = ENOMEM;
1500 	}
1501 
1502 out:
1503 	NET_EPOCH_EXIT(et);
1504 
1505 	return (error);
1506 }
1507 
1508 struct pflow_sockaddr {
1509 	union {
1510 		struct sockaddr_in in;
1511 		struct sockaddr_in6 in6;
1512 		struct sockaddr_storage storage;
1513 	};
1514 };
1515 static bool
1516 pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused)
1517 {
1518 	struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args;
1519 
1520 	if (s->storage.ss_family == AF_INET)
1521 		s->storage.ss_len = sizeof(struct sockaddr_in);
1522 	else if (s->storage.ss_family == AF_INET6)
1523 		s->storage.ss_len = sizeof(struct sockaddr_in6);
1524 	else
1525 		return (false);
1526 
1527 	return (true);
1528 }
1529 
1530 #define	_OUT(_field)	offsetof(struct pflow_sockaddr, _field)
1531 static struct nlattr_parser nla_p_sockaddr[] = {
1532 	{ .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 },
1533 	{ .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 },
1534 	{ .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr },
1535 	{ .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr },
1536 };
1537 NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr);
1538 #undef _OUT
1539 
1540 struct pflow_parsed_set {
1541 	int id;
1542 	uint16_t version;
1543 	struct sockaddr_storage src;
1544 	struct sockaddr_storage dst;
1545 	uint32_t observation_dom;
1546 };
1547 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1548 #define	_OUT(_field)	offsetof(struct pflow_parsed_set, _field)
1549 static const struct nlattr_parser nla_p_set[] = {
1550 	{ .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1551 	{ .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 },
1552 	{ .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested },
1553 	{ .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested },
1554 	{ .type = PFLOWNL_SET_OBSERVATION_DOMAIN, .off = _OUT(observation_dom), .cb = nlattr_get_uint32 },
1555 };
1556 static const struct nlfield_parser nlf_p_set[] = {};
1557 #undef _IN
1558 #undef _OUT
1559 NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
1560 
1561 static int
1562 pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred)
1563 {
1564 	struct thread		*td;
1565 	struct socket		*so;
1566 	int			 error = 0;
1567 
1568 	td = curthread;
1569 
1570 	PFLOW_ASSERT(sc);
1571 
1572 	if (pflowr->version != 0) {
1573 		switch(pflowr->version) {
1574 		case PFLOW_PROTO_5:
1575 		case PFLOW_PROTO_10:
1576 			break;
1577 		default:
1578 			return(EINVAL);
1579 		}
1580 	}
1581 
1582 	pflow_flush(sc);
1583 
1584 	if (pflowr->dst.ss_len != 0) {
1585 		if (sc->sc_flowdst != NULL &&
1586 		    sc->sc_flowdst->sa_family != pflowr->dst.ss_family) {
1587 			free(sc->sc_flowdst, M_DEVBUF);
1588 			sc->sc_flowdst = NULL;
1589 			if (sc->so != NULL) {
1590 				soclose(sc->so);
1591 				sc->so = NULL;
1592 			}
1593 		}
1594 
1595 		switch (pflowr->dst.ss_family) {
1596 		case AF_INET:
1597 			if (sc->sc_flowdst == NULL) {
1598 				if ((sc->sc_flowdst = malloc(
1599 				    sizeof(struct sockaddr_in),
1600 				    M_DEVBUF,  M_NOWAIT)) == NULL)
1601 					return (ENOMEM);
1602 			}
1603 			memcpy(sc->sc_flowdst, &pflowr->dst,
1604 			    sizeof(struct sockaddr_in));
1605 			sc->sc_flowdst->sa_len = sizeof(struct
1606 			    sockaddr_in);
1607 			break;
1608 		case AF_INET6:
1609 			if (sc->sc_flowdst == NULL) {
1610 				if ((sc->sc_flowdst = malloc(
1611 				    sizeof(struct sockaddr_in6),
1612 				    M_DEVBUF, M_NOWAIT)) == NULL)
1613 					return (ENOMEM);
1614 			}
1615 			memcpy(sc->sc_flowdst, &pflowr->dst,
1616 			    sizeof(struct sockaddr_in6));
1617 			sc->sc_flowdst->sa_len = sizeof(struct
1618 			    sockaddr_in6);
1619 			break;
1620 		default:
1621 			break;
1622 		}
1623 	}
1624 
1625 	if (pflowr->src.ss_len != 0) {
1626 		if (sc->sc_flowsrc != NULL)
1627 			free(sc->sc_flowsrc, M_DEVBUF);
1628 		sc->sc_flowsrc = NULL;
1629 		if (sc->so != NULL) {
1630 			soclose(sc->so);
1631 			sc->so = NULL;
1632 		}
1633 		switch(pflowr->src.ss_family) {
1634 		case AF_INET:
1635 			if ((sc->sc_flowsrc = malloc(
1636 			    sizeof(struct sockaddr_in),
1637 			    M_DEVBUF, M_NOWAIT)) == NULL)
1638 				return (ENOMEM);
1639 			memcpy(sc->sc_flowsrc, &pflowr->src,
1640 			    sizeof(struct sockaddr_in));
1641 			sc->sc_flowsrc->sa_len = sizeof(struct
1642 			    sockaddr_in);
1643 			break;
1644 		case AF_INET6:
1645 			if ((sc->sc_flowsrc = malloc(
1646 			    sizeof(struct sockaddr_in6),
1647 			    M_DEVBUF, M_NOWAIT)) == NULL)
1648 				return (ENOMEM);
1649 			memcpy(sc->sc_flowsrc, &pflowr->src,
1650 			    sizeof(struct sockaddr_in6));
1651 			sc->sc_flowsrc->sa_len = sizeof(struct
1652 			    sockaddr_in6);
1653 			break;
1654 		default:
1655 			break;
1656 		}
1657 	}
1658 
1659 	if (sc->so == NULL) {
1660 		if (pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1661 			error = socreate(sc->sc_flowdst->sa_family,
1662 			    &so, SOCK_DGRAM, IPPROTO_UDP, cred, td);
1663 			if (error)
1664 				return (error);
1665 			if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) {
1666 				error = sobind(so, sc->sc_flowsrc, td);
1667 				if (error) {
1668 					soclose(so);
1669 					return (error);
1670 				}
1671 			}
1672 			sc->so = so;
1673 		}
1674 	} else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1675 		soclose(sc->so);
1676 		sc->so = NULL;
1677 	}
1678 
1679 	if (pflowr->observation_dom != 0)
1680 		sc->sc_observation_dom = pflowr->observation_dom;
1681 
1682 	/* error check is above */
1683 	if (pflowr->version != 0)
1684 		sc->sc_version = pflowr->version;
1685 
1686 	pflow_setmtu(sc, ETHERMTU);
1687 
1688 	switch (sc->sc_version) {
1689 	case PFLOW_PROTO_5:
1690 		callout_stop(&sc->sc_tmo6);
1691 		callout_stop(&sc->sc_tmo_tmpl);
1692 		break;
1693 	case PFLOW_PROTO_10:
1694 		callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1695 		    pflow_timeout_tmpl, sc);
1696 		break;
1697 	default: /* NOTREACHED */
1698 		break;
1699 	}
1700 
1701 	return (0);
1702 }
1703 
1704 static int
1705 pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
1706 {
1707 	struct epoch_tracker et;
1708 	struct pflow_parsed_set s = {};
1709 	struct pflow_softc *sc = NULL;
1710 	int error;
1711 
1712 	error = nl_parse_nlmsg(hdr, &set_parser, npt, &s);
1713 	if (error != 0)
1714 		return (error);
1715 
1716 	NET_EPOCH_ENTER(et);
1717 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1718 		if (sc->sc_id == s.id)
1719 			break;
1720 	}
1721 	if (sc == NULL) {
1722 		error = ENOENT;
1723 		goto out;
1724 	}
1725 
1726 	PFLOW_LOCK(sc);
1727 	error = pflow_set(sc, &s, nlp_get_cred(npt->nlp));
1728 	PFLOW_UNLOCK(sc);
1729 
1730 out:
1731 	NET_EPOCH_EXIT(et);
1732 	return (error);
1733 }
1734 
1735 static const struct genl_cmd pflow_cmds[] = {
1736 	{
1737 		.cmd_num = PFLOWNL_CMD_LIST,
1738 		.cmd_name = "LIST",
1739 		.cmd_cb = pflow_nl_list,
1740 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1741 		.cmd_priv = PRIV_NETINET_PF,
1742 	},
1743 	{
1744 		.cmd_num = PFLOWNL_CMD_CREATE,
1745 		.cmd_name = "CREATE",
1746 		.cmd_cb = pflow_nl_create,
1747 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1748 		.cmd_priv = PRIV_NETINET_PF,
1749 	},
1750 	{
1751 		.cmd_num = PFLOWNL_CMD_DEL,
1752 		.cmd_name = "DEL",
1753 		.cmd_cb = pflow_nl_del,
1754 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1755 		.cmd_priv = PRIV_NETINET_PF,
1756 	},
1757 	{
1758 		.cmd_num = PFLOWNL_CMD_GET,
1759 		.cmd_name = "GET",
1760 		.cmd_cb = pflow_nl_get,
1761 		.cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1762 		.cmd_priv = PRIV_NETINET_PF,
1763 	},
1764 	{
1765 		.cmd_num = PFLOWNL_CMD_SET,
1766 		.cmd_name = "SET",
1767 		.cmd_cb = pflow_nl_set,
1768 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1769 		.cmd_priv = PRIV_NETINET_PF,
1770 	},
1771 };
1772 
1773 static const struct nlhdr_parser *all_parsers[] = {
1774 	&del_parser,
1775 	&get_parser,
1776 	&set_parser,
1777 };
1778 
1779 static int
1780 pflow_init(void)
1781 {
1782 	bool ret;
1783 	int family_id __diagused;
1784 
1785 	NL_VERIFY_PARSERS(all_parsers);
1786 
1787 	family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX);
1788 	MPASS(family_id != 0);
1789 	ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, NL_ARRAY_LEN(pflow_cmds));
1790 
1791 	return (ret ? 0 : ENODEV);
1792 }
1793 
1794 static void
1795 pflow_uninit(void)
1796 {
1797 	genl_unregister_family(PFLOWNL_FAMILY_NAME);
1798 }
1799 
1800 static int
1801 pflow_modevent(module_t mod, int type, void *data)
1802 {
1803 	int error = 0;
1804 
1805 	switch (type) {
1806 	case MOD_LOAD:
1807 		error = pflow_init();
1808 		break;
1809 	case MOD_UNLOAD:
1810 		pflow_uninit();
1811 		break;
1812 	default:
1813 		error = EINVAL;
1814 		break;
1815 	}
1816 
1817 	return (error);
1818 }
1819 
1820 static moduledata_t pflow_mod = {
1821 	pflowname,
1822 	pflow_modevent,
1823 	0
1824 };
1825 
1826 DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
1827 MODULE_VERSION(pflow, 1);
1828 MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1829