xref: /freebsd/sys/netpfil/pf/pflow.c (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 /*	$OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $	*/
2 
3 /*
4  * Copyright (c) 2023 Rubicon Communications, LLC (Netgate)
5  * Copyright (c) 2011 Florian Obser <florian@narrans.de>
6  * Copyright (c) 2011 Sebastian Benoit <benoit-lists@fb12.de>
7  * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
8  * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
9  *
10  * Permission to use, copy, modify, and distribute this software for any
11  * purpose with or without fee is hereby granted, provided that the above
12  * copyright notice and this permission notice appear in all copies.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
20  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #include <sys/cdefs.h>
24 #include <sys/param.h>
25 #include <sys/bus.h>
26 #include <sys/callout.h>
27 #include <sys/endian.h>
28 #include <sys/interrupt.h>
29 #include <sys/kernel.h>
30 #include <sys/malloc.h>
31 #include <sys/module.h>
32 #include <sys/mbuf.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/sockio.h>
36 #include <sys/sysctl.h>
37 #include <sys/systm.h>
38 #include <sys/priv.h>
39 
40 #include <net/if.h>
41 #include <net/if_types.h>
42 #include <net/bpf.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <netinet/if_ether.h>
46 #include <netinet/tcp.h>
47 
48 #include <netinet/ip.h>
49 #include <netinet/ip_icmp.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/udp.h>
52 #include <netinet/udp_var.h>
53 #include <netinet/in_pcb.h>
54 
55 #include <netlink/netlink.h>
56 #include <netlink/netlink_ctl.h>
57 #include <netlink/netlink_generic.h>
58 #include <netlink/netlink_message_writer.h>
59 
60 #include <net/pfvar.h>
61 #include <net/pflow.h>
62 #include "net/if_var.h"
63 
64 #define PFLOW_MINMTU	\
65     (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
66 
67 #ifdef PFLOWDEBUG
68 #define DPRINTF(x)	do { printf x ; } while (0)
69 #else
70 #define DPRINTF(x)
71 #endif
72 
73 enum pflow_family_t {
74 	PFLOW_INET,
75 	PFLOW_INET6,
76 	PFLOW_NAT4,
77 };
78 
79 static void	pflow_output_process(void *);
80 static int	pflow_create(int);
81 static int	pflow_destroy(int, bool);
82 static int	pflow_calc_mtu(struct pflow_softc *, int, int);
83 static void	pflow_setmtu(struct pflow_softc *, int);
84 static int	pflowvalidsockaddr(const struct sockaddr *, int);
85 
86 static struct mbuf	*pflow_get_mbuf(struct pflow_softc *, u_int16_t);
87 static void	pflow_flush(struct pflow_softc *);
88 static int	pflow_sendout_v5(struct pflow_softc *);
89 static int	pflow_sendout_ipfix(struct pflow_softc *, enum pflow_family_t);
90 static int	pflow_sendout_ipfix_tmpl(struct pflow_softc *);
91 static int	pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
92 static int	sysctl_pflowstats(SYSCTL_HANDLER_ARGS);
93 static void	pflow_timeout(void *);
94 static void	pflow_timeout6(void *);
95 static void	pflow_timeout_tmpl(void *);
96 static void	pflow_timeout_nat4(void *);
97 static void	copy_flow_data(struct pflow_flow *, struct pflow_flow *,
98 	const struct pf_kstate *, struct pf_state_key *, int, int);
99 static void	copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *,
100 	struct pflow_ipfix_flow4 *, const struct pf_kstate *, struct pf_state_key *,
101 	struct pflow_softc *, int, int);
102 static void	copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *,
103 	struct pflow_ipfix_flow6 *, const struct pf_kstate *, struct pf_state_key *,
104 	struct pflow_softc *, int, int);
105 static int	pflow_pack_flow(const struct pf_kstate *, struct pf_state_key *,
106 	struct pflow_softc *);
107 static int	pflow_pack_flow_ipfix(const struct pf_kstate *, struct pf_state_key *,
108 	struct pflow_softc *);
109 static void	export_pflow(const struct pf_kstate *);
110 static int	export_pflow_if(const struct pf_kstate*, struct pf_state_key *,
111 	struct pflow_softc *);
112 static int	copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
113 static int	copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow,
114 	struct pflow_softc *sc);
115 static int	copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow,
116 	struct pflow_softc *sc);
117 static int	copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *,
118 	const struct pf_kstate *, struct pflow_softc *,
119 	uint8_t, uint64_t);
120 
121 static const char pflowname[] = "pflow";
122 
123 enum pflowstat_counters {
124 	pflow_flows,
125 	pflow_packets,
126 	pflow_onomem,
127 	pflow_oerrors,
128 	pflow_ncounters,
129 };
130 struct pflowstats_ctr {
131 	counter_u64_t	c[pflow_ncounters];
132 };
133 
134 /**
135  * Locking concept
136  *
137  * The list of pflow devices (V_pflowif_list) is managed through epoch.
138  * It is safe to read the list without locking (while in NET_EPOCH).
139  * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx
140  * on every add/delete.
141  *
142  * Each pflow interface protects its own data with the sc_lock mutex.
143  *
144  * We do not require any pf locks, and in fact expect to be called without
145  * hashrow locks held.
146  **/
147 
148 VNET_DEFINE(struct unrhdr *,	pflow_unr);
149 #define	V_pflow_unr	VNET(pflow_unr)
150 VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list);
151 #define	V_pflowif_list	VNET(pflowif_list)
152 VNET_DEFINE(struct mtx, pflowif_list_mtx);
153 #define	V_pflowif_list_mtx	VNET(pflowif_list_mtx)
154 VNET_DEFINE(struct pflowstats_ctr,	 pflowstat);
155 #define	V_pflowstats	VNET(pflowstat)
156 
157 #define	PFLOW_LOCK(_sc)		mtx_lock(&(_sc)->sc_lock)
158 #define	PFLOW_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_lock)
159 #define	PFLOW_ASSERT(_sc)	mtx_assert(&(_sc)->sc_lock, MA_OWNED)
160 
161 SYSCTL_NODE(_net, OID_AUTO, pflow, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
162     "PFLOW");
163 SYSCTL_PROC(_net_pflow, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
164     0, 0, sysctl_pflowstats, "S,pflowstats",
165     "PFLOW statistics (struct pflowstats, net/if_pflow.h)");
166 
167 static inline void
168 pflowstat_inc(enum pflowstat_counters c)
169 {
170 	counter_u64_add(V_pflowstats.c[c], 1);
171 }
172 
173 static void
174 vnet_pflowattach(void)
175 {
176 	CK_LIST_INIT(&V_pflowif_list);
177 	mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF);
178 
179 	V_pflow_unr = new_unrhdr(0, PFLOW_MAX_ENTRIES - 1, &V_pflowif_list_mtx);
180 
181 	for (int i = 0; i < pflow_ncounters; i++)
182 		V_pflowstats.c[i] = counter_u64_alloc(M_WAITOK);
183 }
184 VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
185     vnet_pflowattach, NULL);
186 
187 static void
188 vnet_pflowdetach(void)
189 {
190 	struct pflow_softc	*sc;
191 
192 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
193 		pflow_destroy(sc->sc_id, false);
194 	}
195 
196 	MPASS(CK_LIST_EMPTY(&V_pflowif_list));
197 	delete_unrhdr(V_pflow_unr);
198 	mtx_destroy(&V_pflowif_list_mtx);
199 
200 	for (int i = 0; i < pflow_ncounters; i++)
201 		counter_u64_free(V_pflowstats.c[i]);
202 }
203 VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
204     vnet_pflowdetach, NULL);
205 
206 static void
207 vnet_pflow_finalise(void)
208 {
209 	/*
210 	 * Ensure we've freed all interfaces, and do not have pending
211 	 * epoch cleanup calls.
212 	 */
213 	NET_EPOCH_DRAIN_CALLBACKS();
214 }
215 VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
216     vnet_pflow_finalise, NULL);
217 
218 static void
219 pflow_output_process(void *arg)
220 {
221 	struct mbufq ml;
222 	struct pflow_softc *sc = arg;
223 	struct mbuf *m;
224 
225 	mbufq_init(&ml, 0);
226 
227 	PFLOW_LOCK(sc);
228 	mbufq_concat(&ml, &sc->sc_outputqueue);
229 	PFLOW_UNLOCK(sc);
230 
231 	CURVNET_SET(sc->sc_vnet);
232 	while ((m = mbufq_dequeue(&ml)) != NULL) {
233 		pflow_sendout_mbuf(sc, m);
234 	}
235 	CURVNET_RESTORE();
236 }
237 
238 static int
239 pflow_create(int unit)
240 {
241 	struct pflow_softc	*pflowif;
242 	int			 error;
243 
244 	pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
245 	mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF);
246 	pflowif->sc_version = PFLOW_PROTO_DEFAULT;
247 	pflowif->sc_observation_dom = PFLOW_ENGINE_TYPE;
248 
249 	/* ipfix template init */
250 	bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix));
251 	pflowif->sc_tmpl_ipfix.set_header.set_id =
252 	    htons(PFLOW_IPFIX_TMPL_SET_ID);
253 	pflowif->sc_tmpl_ipfix.set_header.set_length =
254 	    htons(sizeof(struct pflow_ipfix_tmpl));
255 
256 	/* ipfix IPv4 template */
257 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id =
258 	    htons(PFLOW_IPFIX_TMPL_IPV4_ID);
259 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count
260 	    = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT);
261 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id =
262 	    htons(PFIX_IE_sourceIPv4Address);
263 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4);
264 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id =
265 	    htons(PFIX_IE_destinationIPv4Address);
266 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4);
267 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id =
268 	    htons(PFIX_IE_ingressInterface);
269 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4);
270 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id =
271 	    htons(PFIX_IE_egressInterface);
272 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4);
273 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id =
274 	    htons(PFIX_IE_packetDeltaCount);
275 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8);
276 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id =
277 	    htons(PFIX_IE_octetDeltaCount);
278 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8);
279 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id =
280 	    htons(PFIX_IE_flowStartMilliseconds);
281 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8);
282 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id =
283 	    htons(PFIX_IE_flowEndMilliseconds);
284 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8);
285 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id =
286 	    htons(PFIX_IE_sourceTransportPort);
287 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2);
288 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id =
289 	    htons(PFIX_IE_destinationTransportPort);
290 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2);
291 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id =
292 	    htons(PFIX_IE_ipClassOfService);
293 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1);
294 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id =
295 	    htons(PFIX_IE_protocolIdentifier);
296 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1);
297 
298 	/* ipfix IPv6 template */
299 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id =
300 	    htons(PFLOW_IPFIX_TMPL_IPV6_ID);
301 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count =
302 	    htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT);
303 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id =
304 	    htons(PFIX_IE_sourceIPv6Address);
305 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16);
306 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id =
307 	    htons(PFIX_IE_destinationIPv6Address);
308 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16);
309 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id =
310 	    htons(PFIX_IE_ingressInterface);
311 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4);
312 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id =
313 	    htons(PFIX_IE_egressInterface);
314 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4);
315 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id =
316 	    htons(PFIX_IE_packetDeltaCount);
317 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8);
318 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id =
319 	    htons(PFIX_IE_octetDeltaCount);
320 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8);
321 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id =
322 	    htons(PFIX_IE_flowStartMilliseconds);
323 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8);
324 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id =
325 	    htons(PFIX_IE_flowEndMilliseconds);
326 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8);
327 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id =
328 	    htons(PFIX_IE_sourceTransportPort);
329 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2);
330 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id =
331 	    htons(PFIX_IE_destinationTransportPort);
332 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2);
333 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id =
334 	    htons(PFIX_IE_ipClassOfService);
335 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1);
336 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id =
337 	    htons(PFIX_IE_protocolIdentifier);
338 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1);
339 
340 	/* NAT44 create template */
341 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.tmpl_id =
342 	    htons(PFLOW_IPFIX_TMPL_NAT44_ID);
343 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.field_count =
344 	    htons(PFLOW_IPFIX_TMPL_NAT44_FIELD_COUNT);
345 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.field_id =
346 	    htons(PFIX_IE_timeStamp);
347 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.len =
348 	    htons(8);
349 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.field_id =
350 	    htons(PFIX_IE_natEvent);
351 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.len =
352 	    htons(1);
353 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.field_id =
354 	    htons(PFIX_IE_protocolIdentifier);
355 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.len = htons(1);
356 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.field_id =
357 	    htons(PFIX_IE_sourceIPv4Address);
358 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.len =
359 	    htons(4);
360 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.field_id =
361 	    htons(PFIX_IE_sourceTransportPort);
362 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.len = htons(2);
363 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.field_id =
364 	    htons(PFIX_IE_postNATSourceIPv4Address);
365 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.len =
366 	    htons(4);
367 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.field_id =
368 	    htons(PFIX_IE_postNAPTSourceTransportPort);
369 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.len =
370 	    htons(2);
371 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.field_id =
372 	    htons(PFIX_IE_destinationIPv4Address);
373 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.len =
374 	    htons(4);
375 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.field_id =
376 	    htons(PFIX_IE_destinationTransportPort);
377 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.len = htons(2);
378 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.field_id =
379 	    htons(PFIX_IE_postNATDestinationIPv4Address);
380 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.len =
381 	    htons(4);
382 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.field_id =
383 	    htons(PFIX_IE_postNAPTDestinationTransportPort);
384 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.len =
385 	    htons(2);
386 
387 	pflowif->sc_id = unit;
388 	pflowif->sc_vnet = curvnet;
389 
390 	mbufq_init(&pflowif->sc_outputqueue, 8192);
391 	pflow_setmtu(pflowif, ETHERMTU);
392 
393 	callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0);
394 	callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0);
395 	callout_init_mtx(&pflowif->sc_tmo_nat4, &pflowif->sc_lock, 0);
396 	callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0);
397 
398 	error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process,
399 	    pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie);
400 	if (error) {
401 		free(pflowif, M_DEVBUF);
402 		return (error);
403 	}
404 
405 	/* Insert into list of pflows */
406 	mtx_lock(&V_pflowif_list_mtx);
407 	CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next);
408 	mtx_unlock(&V_pflowif_list_mtx);
409 
410 	V_pflow_export_state_ptr = export_pflow;
411 
412 	return (0);
413 }
414 
415 static void
416 pflow_free_cb(struct epoch_context *ctx)
417 {
418 	struct pflow_softc *sc;
419 
420 	sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx);
421 
422 	free(sc, M_DEVBUF);
423 }
424 
425 static int
426 pflow_destroy(int unit, bool drain)
427 {
428 	struct pflow_softc	*sc;
429 	int			 error __diagused;
430 
431 	mtx_lock(&V_pflowif_list_mtx);
432 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
433 		if (sc->sc_id == unit)
434 			break;
435 	}
436 	if (sc == NULL) {
437 		mtx_unlock(&V_pflowif_list_mtx);
438 		return (ENOENT);
439 	}
440 	CK_LIST_REMOVE(sc, sc_next);
441 	if (CK_LIST_EMPTY(&V_pflowif_list))
442 		V_pflow_export_state_ptr = NULL;
443 	mtx_unlock(&V_pflowif_list_mtx);
444 
445 	sc->sc_dying = 1;
446 
447 	if (drain) {
448 		/* Let's be sure no one is using this interface any more. */
449 		NET_EPOCH_DRAIN_CALLBACKS();
450 	}
451 
452 	error = swi_remove(sc->sc_swi_cookie);
453 	MPASS(error == 0);
454 	error = intr_event_destroy(sc->sc_swi_ie);
455 	MPASS(error == 0);
456 
457 	callout_drain(&sc->sc_tmo);
458 	callout_drain(&sc->sc_tmo6);
459 	callout_drain(&sc->sc_tmo_nat4);
460 	callout_drain(&sc->sc_tmo_tmpl);
461 
462 	m_freem(sc->sc_mbuf);
463 	m_freem(sc->sc_mbuf6);
464 	m_freem(sc->sc_mbuf_nat4);
465 
466 	PFLOW_LOCK(sc);
467 	mbufq_drain(&sc->sc_outputqueue);
468 	if (sc->so != NULL) {
469 		soclose(sc->so);
470 		sc->so = NULL;
471 	}
472 	if (sc->sc_flowdst != NULL)
473 		free(sc->sc_flowdst, M_DEVBUF);
474 	if (sc->sc_flowsrc != NULL)
475 		free(sc->sc_flowsrc, M_DEVBUF);
476 	PFLOW_UNLOCK(sc);
477 
478 	mtx_destroy(&sc->sc_lock);
479 
480 	free_unr(V_pflow_unr, unit);
481 
482 	NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx);
483 
484 	return (0);
485 }
486 
487 static int
488 pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port)
489 {
490 	const struct sockaddr_in6	*sin6;
491 	const struct sockaddr_in	*sin;
492 
493 	if (sa == NULL)
494 		return (0);
495 	switch(sa->sa_family) {
496 	case AF_INET:
497 		sin = (const struct sockaddr_in *)sa;
498 		return (sin->sin_addr.s_addr != INADDR_ANY &&
499 		    (ignore_port || sin->sin_port != 0));
500 	case AF_INET6:
501 		sin6 = (const struct sockaddr_in6 *)sa;
502 		return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
503 		    (ignore_port || sin6->sin6_port != 0));
504 	default:
505 		return (0);
506 	}
507 }
508 
509 int
510 pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
511 {
512 	size_t min;
513 
514 	sc->sc_maxcount4 = (mtu - hdrsz -
515 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
516 	sc->sc_maxcount6 = (mtu - hdrsz -
517 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6);
518 	sc->sc_maxcount_nat4 = (mtu - hdrsz -
519 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_nat4);
520 	if (sc->sc_maxcount4 > PFLOW_MAXFLOWS)
521 		sc->sc_maxcount4 = PFLOW_MAXFLOWS;
522 	if (sc->sc_maxcount6 > PFLOW_MAXFLOWS)
523 		sc->sc_maxcount6 = PFLOW_MAXFLOWS;
524 	if (sc->sc_maxcount_nat4 > PFLOW_MAXFLOWS)
525 		sc->sc_maxcount_nat4 = PFLOW_MAXFLOWS;
526 
527 	min = MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4),
528 	    sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6));
529 	min = MIN(min, sc->sc_maxcount_nat4 * sizeof(struct pflow_ipfix_nat4));
530 
531 	return (hdrsz + sizeof(struct udpiphdr) + min);
532 }
533 
534 static void
535 pflow_setmtu(struct pflow_softc *sc, int mtu_req)
536 {
537 	int	mtu;
538 
539 	mtu = mtu_req;
540 
541 	switch (sc->sc_version) {
542 	case PFLOW_PROTO_5:
543 		sc->sc_maxcount = (mtu - sizeof(struct pflow_header) -
544 		    sizeof(struct udpiphdr)) / sizeof(struct pflow_flow);
545 		if (sc->sc_maxcount > PFLOW_MAXFLOWS)
546 		    sc->sc_maxcount = PFLOW_MAXFLOWS;
547 		break;
548 	case PFLOW_PROTO_10:
549 		pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header));
550 		break;
551 	default: /* NOTREACHED */
552 		break;
553 	}
554 }
555 
556 static struct mbuf *
557 pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id)
558 {
559 	struct pflow_set_header	 set_hdr;
560 	struct pflow_header	 h;
561 	struct mbuf		*m;
562 
563 	MGETHDR(m, M_NOWAIT, MT_DATA);
564 	if (m == NULL) {
565 		pflowstat_inc(pflow_onomem);
566 		return (NULL);
567 	}
568 
569 	MCLGET(m, M_NOWAIT);
570 	if ((m->m_flags & M_EXT) == 0) {
571 		m_free(m);
572 		pflowstat_inc(pflow_onomem);
573 		return (NULL);
574 	}
575 
576 	m->m_len = m->m_pkthdr.len = 0;
577 
578 	if (sc == NULL)		/* get only a new empty mbuf */
579 		return (m);
580 
581 	switch (sc->sc_version) {
582 	case PFLOW_PROTO_5:
583 		/* populate pflow_header */
584 		h.reserved1 = 0;
585 		h.reserved2 = 0;
586 		h.count = 0;
587 		h.version = htons(PFLOW_PROTO_5);
588 		h.flow_sequence = htonl(sc->sc_gcounter);
589 		h.engine_type = PFLOW_ENGINE_TYPE;
590 		h.engine_id = PFLOW_ENGINE_ID;
591 		m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h);
592 
593 		sc->sc_count = 0;
594 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
595 		    pflow_timeout, sc);
596 		break;
597 	case PFLOW_PROTO_10:
598 		/* populate pflow_set_header */
599 		set_hdr.set_length = 0;
600 		set_hdr.set_id = htons(set_id);
601 		m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr);
602 		break;
603 	default: /* NOTREACHED */
604 		break;
605 	}
606 
607 	return (m);
608 }
609 
610 static void
611 copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
612     const struct pf_kstate *st, struct pf_state_key *sk, int src, int dst)
613 {
614 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
615 	flow1->src_port = flow2->dest_port = sk->port[src];
616 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
617 	flow1->dest_port = flow2->src_port = sk->port[dst];
618 
619 	flow1->dest_as = flow2->src_as =
620 	    flow1->src_as = flow2->dest_as = 0;
621 	flow1->if_index_in = htons(st->if_index_in);
622 	flow1->if_index_out = htons(st->if_index_out);
623 	flow2->if_index_in = htons(st->if_index_out);
624 	flow2->if_index_out = htons(st->if_index_in);
625 	flow1->dest_mask = flow2->src_mask =
626 	    flow1->src_mask = flow2->dest_mask = 0;
627 
628 	flow1->flow_packets = htonl(st->packets[0]);
629 	flow2->flow_packets = htonl(st->packets[1]);
630 	flow1->flow_octets = htonl(st->bytes[0]);
631 	flow2->flow_octets = htonl(st->bytes[1]);
632 
633 	/*
634 	 * Pretend the flow was created or expired when the machine came up
635 	 * when creation is in the future of the last time a package was seen
636 	 * or was created / expired before this machine came up due to pfsync.
637 	 */
638 	flow1->flow_start = flow2->flow_start = st->creation < 0 ||
639 	    st->creation > st->expire ? htonl(0) : htonl(st->creation);
640 	flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) :
641 	    htonl(st->expire);
642 	flow1->tcp_flags = flow2->tcp_flags = 0;
643 	flow1->protocol = flow2->protocol = sk->proto;
644 	flow1->tos = flow2->tos = st->rule.ptr->tos;
645 }
646 
647 static void
648 copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1,
649     struct pflow_ipfix_flow4 *flow2, const struct pf_kstate *st,
650     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
651 {
652 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
653 	flow1->src_port = flow2->dest_port = sk->port[src];
654 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
655 	flow1->dest_port = flow2->src_port = sk->port[dst];
656 
657 	flow1->if_index_in = htonl(st->if_index_in);
658 	flow1->if_index_out = htonl(st->if_index_out);
659 	flow2->if_index_in = htonl(st->if_index_out);
660 	flow2->if_index_out = htonl(st->if_index_in);
661 
662 	flow1->flow_packets = htobe64(st->packets[0]);
663 	flow2->flow_packets = htobe64(st->packets[1]);
664 	flow1->flow_octets = htobe64(st->bytes[0]);
665 	flow2->flow_octets = htobe64(st->bytes[1]);
666 
667 	/*
668 	 * Pretend the flow was created when the machine came up when creation
669 	 * is in the future of the last time a package was seen due to pfsync.
670 	 */
671 	if (st->creation > st->expire)
672 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
673 		    time_uptime)*1000);
674 	else
675 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
676 		    (pf_get_uptime() - st->creation)));
677 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
678 	    (pf_get_uptime() - st->expire)));
679 
680 	flow1->protocol = flow2->protocol = sk->proto;
681 	flow1->tos = flow2->tos = st->rule.ptr->tos;
682 }
683 
684 static void
685 copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1,
686     struct pflow_ipfix_flow6 *flow2, const struct pf_kstate *st,
687     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
688 {
689 	bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip));
690 	bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip));
691 	flow1->src_port = flow2->dest_port = sk->port[src];
692 	bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip));
693 	bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip));
694 	flow1->dest_port = flow2->src_port = sk->port[dst];
695 
696 	flow1->if_index_in = htonl(st->if_index_in);
697 	flow1->if_index_out = htonl(st->if_index_out);
698 	flow2->if_index_in = htonl(st->if_index_out);
699 	flow2->if_index_out = htonl(st->if_index_in);
700 
701 	flow1->flow_packets = htobe64(st->packets[0]);
702 	flow2->flow_packets = htobe64(st->packets[1]);
703 	flow1->flow_octets = htobe64(st->bytes[0]);
704 	flow2->flow_octets = htobe64(st->bytes[1]);
705 
706 	/*
707 	 * Pretend the flow was created when the machine came up when creation
708 	 * is in the future of the last time a package was seen due to pfsync.
709 	 */
710 	if (st->creation > st->expire)
711 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
712 		    time_uptime)*1000);
713 	else
714 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
715 		    (pf_get_uptime() - st->creation)));
716 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
717 	    (pf_get_uptime() - st->expire)));
718 
719 	flow1->protocol = flow2->protocol = sk->proto;
720 	flow1->tos = flow2->tos = st->rule.ptr->tos;
721 }
722 
723 static void
724 copy_nat_ipfix_4_data(struct pflow_ipfix_nat4 *nat1,
725     struct pflow_ipfix_nat4 *nat2, const struct pf_kstate *st,
726     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
727 {
728 	nat1->src_ip = nat2->dest_ip = st->key[PF_SK_STACK]->addr[src].v4.s_addr;
729 	nat1->src_port = nat2->dest_port = st->key[PF_SK_STACK]->port[src];
730 	nat1->dest_ip = nat2->src_ip = st->key[PF_SK_STACK]->addr[dst].v4.s_addr;
731 	nat1->dest_port = nat2->src_port = st->key[PF_SK_STACK]->port[dst];
732 	nat1->postnat_src_ip = nat2->postnat_dest_ip = st->key[PF_SK_WIRE]->addr[src].v4.s_addr;
733 	nat1->postnat_src_port = nat2->postnat_dest_port = st->key[PF_SK_WIRE]->port[src];
734 	nat1->postnat_dest_ip = nat2->postnat_src_ip = st->key[PF_SK_WIRE]->addr[dst].v4.s_addr;
735 	nat1->postnat_dest_port = nat2->postnat_src_port = st->key[PF_SK_WIRE]->port[dst];
736 	nat1->protocol = nat2->protocol = sk->proto;
737 
738 	/*
739 	 * Because we have to generate a create and delete event we'll fill out the
740 	 * timestamp and nat_event fields when we transmit. As opposed to doing this
741 	 * work a second time.
742 	*/
743 }
744 
745 static void
746 export_pflow(const struct pf_kstate *st)
747 {
748 	struct pflow_softc	*sc = NULL;
749 	struct pf_state_key	*sk;
750 
751 	NET_EPOCH_ASSERT();
752 
753 	/* e.g. if pf_state_key_attach() fails. */
754 	if (st->key[PF_SK_STACK] == NULL || st->key[PF_SK_WIRE] == NULL)
755 		return;
756 
757 	sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
758 
759 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
760 		PFLOW_LOCK(sc);
761 		switch (sc->sc_version) {
762 		case PFLOW_PROTO_5:
763 			if (sk->af == AF_INET)
764 				export_pflow_if(st, sk, sc);
765 			break;
766 		case PFLOW_PROTO_10:
767 			if (sk->af == AF_INET || sk->af == AF_INET6)
768 				export_pflow_if(st, sk, sc);
769 			break;
770 		default: /* NOTREACHED */
771 			break;
772 		}
773 		PFLOW_UNLOCK(sc);
774 	}
775 }
776 
777 static int
778 export_pflow_if(const struct pf_kstate *st, struct pf_state_key *sk,
779     struct pflow_softc *sc)
780 {
781 	struct pf_kstate	 pfs_copy;
782 	u_int64_t		 bytes[2];
783 	int			 ret = 0;
784 
785 	if (sc->sc_version == PFLOW_PROTO_10)
786 		return (pflow_pack_flow_ipfix(st, sk, sc));
787 
788 	/* PFLOW_PROTO_5 */
789 	if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
790 	    && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
791 		return (pflow_pack_flow(st, sk, sc));
792 
793 	/* flow > PFLOW_MAXBYTES need special handling */
794 	bcopy(st, &pfs_copy, sizeof(pfs_copy));
795 	bytes[0] = pfs_copy.bytes[0];
796 	bytes[1] = pfs_copy.bytes[1];
797 
798 	while (bytes[0] > PFLOW_MAXBYTES) {
799 		pfs_copy.bytes[0] = PFLOW_MAXBYTES;
800 		pfs_copy.bytes[1] = 0;
801 
802 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
803 			return (ret);
804 		if ((bytes[0] - PFLOW_MAXBYTES) > 0)
805 			bytes[0] -= PFLOW_MAXBYTES;
806 	}
807 
808 	while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
809 		pfs_copy.bytes[1] = PFLOW_MAXBYTES;
810 		pfs_copy.bytes[0] = 0;
811 
812 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
813 			return (ret);
814 		if ((bytes[1] - PFLOW_MAXBYTES) > 0)
815 			bytes[1] -= PFLOW_MAXBYTES;
816 	}
817 
818 	pfs_copy.bytes[0] = bytes[0];
819 	pfs_copy.bytes[1] = bytes[1];
820 
821 	return (pflow_pack_flow(&pfs_copy, sk, sc));
822 }
823 
824 static int
825 copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc)
826 {
827 	int		ret = 0;
828 
829 	PFLOW_ASSERT(sc);
830 
831 	if (sc->sc_mbuf == NULL) {
832 		if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
833 			return (ENOBUFS);
834 	}
835 	m_copyback(sc->sc_mbuf, PFLOW_HDRLEN +
836 	    (sc->sc_count * sizeof(struct pflow_flow)),
837 	    sizeof(struct pflow_flow), (caddr_t)flow);
838 
839 	pflowstat_inc(pflow_flows);
840 	sc->sc_gcounter++;
841 	sc->sc_count++;
842 
843 	if (sc->sc_count >= sc->sc_maxcount)
844 		ret = pflow_sendout_v5(sc);
845 
846 	return(ret);
847 }
848 
849 static int
850 copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc)
851 {
852 	int		ret = 0;
853 
854 	PFLOW_ASSERT(sc);
855 
856 	if (sc->sc_mbuf == NULL) {
857 		if ((sc->sc_mbuf =
858 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
859 			return (ENOBUFS);
860 		}
861 		sc->sc_count4 = 0;
862 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
863 		    pflow_timeout, sc);
864 	}
865 	m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN +
866 	    (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)),
867 	    sizeof(struct pflow_ipfix_flow4), (caddr_t)flow);
868 
869 	pflowstat_inc(pflow_flows);
870 	sc->sc_gcounter++;
871 	sc->sc_count4++;
872 
873 	if (sc->sc_count4 >= sc->sc_maxcount4)
874 		ret = pflow_sendout_ipfix(sc, PFLOW_INET);
875 	return(ret);
876 }
877 
878 static int
879 copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc)
880 {
881 	int		ret = 0;
882 
883 	PFLOW_ASSERT(sc);
884 
885 	if (sc->sc_mbuf6 == NULL) {
886 		if ((sc->sc_mbuf6 =
887 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
888 			return (ENOBUFS);
889 		}
890 		sc->sc_count6 = 0;
891 		callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz,
892 		    pflow_timeout6, sc);
893 	}
894 	m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN +
895 	    (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)),
896 	    sizeof(struct pflow_ipfix_flow6), (caddr_t)flow);
897 
898 	pflowstat_inc(pflow_flows);
899 	sc->sc_gcounter++;
900 	sc->sc_count6++;
901 
902 	if (sc->sc_count6 >= sc->sc_maxcount6)
903 		ret = pflow_sendout_ipfix(sc, PFLOW_INET6);
904 
905 	return(ret);
906 }
907 
908 int
909 copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *nat, const struct pf_kstate *st,
910     struct pflow_softc *sc, uint8_t event, uint64_t timestamp)
911 {
912 	int		ret = 0;
913 
914 	PFLOW_ASSERT(sc);
915 
916 	if (sc->sc_mbuf_nat4 == NULL) {
917 		if ((sc->sc_mbuf_nat4 =
918 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_NAT44_ID)) == NULL) {
919 			return (ENOBUFS);
920 		}
921 		sc->sc_count_nat4 = 0;
922 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
923 		    pflow_timeout_nat4, sc);
924 	}
925 
926 	nat->nat_event = event;
927 	nat->timestamp = htobe64(pf_get_time() - (pf_get_uptime() - timestamp));
928 	m_copyback(sc->sc_mbuf_nat4, PFLOW_SET_HDRLEN +
929 	    (sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4)),
930 	    sizeof(struct pflow_ipfix_nat4), (caddr_t)nat);
931 	sc->sc_count_nat4++;
932 
933 	pflowstat_inc(pflow_flows);
934 	sc->sc_gcounter++;
935 
936 	if (sc->sc_count_nat4 >= sc->sc_maxcount_nat4)
937 		ret = pflow_sendout_ipfix(sc, PFLOW_NAT4);
938 
939 	return (ret);
940 }
941 
942 static int
943 pflow_pack_flow(const struct pf_kstate *st, struct pf_state_key *sk,
944     struct pflow_softc *sc)
945 {
946 	struct pflow_flow	 flow1;
947 	struct pflow_flow	 flow2;
948 	int			 ret = 0;
949 
950 	bzero(&flow1, sizeof(flow1));
951 	bzero(&flow2, sizeof(flow2));
952 
953 	if (st->direction == PF_OUT)
954 		copy_flow_data(&flow1, &flow2, st, sk, 1, 0);
955 	else
956 		copy_flow_data(&flow1, &flow2, st, sk, 0, 1);
957 
958 	if (st->bytes[0] != 0) /* first flow from state */
959 		ret = copy_flow_to_m(&flow1, sc);
960 
961 	if (st->bytes[1] != 0) /* second flow from state */
962 		ret = copy_flow_to_m(&flow2, sc);
963 
964 	return (ret);
965 }
966 
967 static bool
968 pflow_is_natd(const struct pf_kstate *st)
969 {
970 	/* If ports or addresses are different we've been NAT-ed. */
971 	return (memcmp(st->key[PF_SK_WIRE], st->key[PF_SK_STACK],
972 	    sizeof(struct pf_addr) * 2 + sizeof(uint16_t) * 2) != 0);
973 }
974 
975 static int
976 pflow_pack_flow_ipfix(const struct pf_kstate *st, struct pf_state_key *sk,
977     struct pflow_softc *sc)
978 {
979 	struct pflow_ipfix_flow4	 flow4_1, flow4_2;
980 	struct pflow_ipfix_nat4		 nat4_1, nat4_2;
981 	struct pflow_ipfix_flow6	 flow6_1, flow6_2;
982 	int				 ret = 0;
983 	bool				 nat = false;
984 
985 	if (sk->af == AF_INET) {
986 		bzero(&flow4_1, sizeof(flow4_1));
987 		bzero(&flow4_2, sizeof(flow4_2));
988 
989 		nat = pflow_is_natd(st);
990 
991 		if (st->direction == PF_OUT)
992 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
993 			    1, 0);
994 		else
995 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
996 			    0, 1);
997 
998 		if (nat)
999 			copy_nat_ipfix_4_data(&nat4_1, &nat4_2, st, sk, sc, 1, 0);
1000 
1001 		if (st->bytes[0] != 0) /* first flow from state */ {
1002 			ret = copy_flow_ipfix_4_to_m(&flow4_1, sc);
1003 
1004 			if (ret == 0 && nat) {
1005 				ret = copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1006 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1007 				ret |= copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1008 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1009 			}
1010 		}
1011 
1012 		if (st->bytes[1] != 0) /* second flow from state */ {
1013 			ret = copy_flow_ipfix_4_to_m(&flow4_2, sc);
1014 
1015 			if (ret == 0 && nat) {
1016 				ret = copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1017 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1018 				ret |= copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1019 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1020 			}
1021 		}
1022 	} else if (sk->af == AF_INET6) {
1023 		bzero(&flow6_1, sizeof(flow6_1));
1024 		bzero(&flow6_2, sizeof(flow6_2));
1025 
1026 		if (st->direction == PF_OUT)
1027 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1028 			    1, 0);
1029 		else
1030 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1031 			    0, 1);
1032 
1033 		if (st->bytes[0] != 0) /* first flow from state */
1034 			ret = copy_flow_ipfix_6_to_m(&flow6_1, sc);
1035 
1036 		if (st->bytes[1] != 0) /* second flow from state */
1037 			ret = copy_flow_ipfix_6_to_m(&flow6_2, sc);
1038 	}
1039 	return (ret);
1040 }
1041 
1042 static void
1043 pflow_timeout(void *v)
1044 {
1045 	struct pflow_softc	*sc = v;
1046 
1047 	PFLOW_ASSERT(sc);
1048 	CURVNET_SET(sc->sc_vnet);
1049 
1050 	switch (sc->sc_version) {
1051 	case PFLOW_PROTO_5:
1052 		pflow_sendout_v5(sc);
1053 		break;
1054 	case PFLOW_PROTO_10:
1055 		pflow_sendout_ipfix(sc, PFLOW_INET);
1056 		break;
1057 	default: /* NOTREACHED */
1058 		panic("Unsupported version %d", sc->sc_version);
1059 		break;
1060 	}
1061 
1062 	CURVNET_RESTORE();
1063 }
1064 
1065 static void
1066 pflow_timeout6(void *v)
1067 {
1068 	struct pflow_softc	*sc = v;
1069 
1070 	PFLOW_ASSERT(sc);
1071 
1072 	if (sc->sc_version != PFLOW_PROTO_10)
1073 		return;
1074 
1075 	CURVNET_SET(sc->sc_vnet);
1076 	pflow_sendout_ipfix(sc, PFLOW_INET6);
1077 	CURVNET_RESTORE();
1078 }
1079 
1080 static void
1081 pflow_timeout_tmpl(void *v)
1082 {
1083 	struct pflow_softc	*sc = v;
1084 
1085 	PFLOW_ASSERT(sc);
1086 
1087 	if (sc->sc_version != PFLOW_PROTO_10)
1088 		return;
1089 
1090 	CURVNET_SET(sc->sc_vnet);
1091 	pflow_sendout_ipfix_tmpl(sc);
1092 	CURVNET_RESTORE();
1093 }
1094 
1095 static void
1096 pflow_timeout_nat4(void *v)
1097 {
1098 	struct pflow_softc	*sc = v;
1099 
1100 	PFLOW_ASSERT(sc);
1101 
1102 	if (sc->sc_version != PFLOW_PROTO_10)
1103 		return;
1104 
1105 	CURVNET_SET(sc->sc_vnet);
1106 	pflow_sendout_ipfix(sc, PFLOW_NAT4);
1107 	CURVNET_RESTORE();
1108 }
1109 
1110 static void
1111 pflow_flush(struct pflow_softc *sc)
1112 {
1113 	PFLOW_ASSERT(sc);
1114 
1115 	switch (sc->sc_version) {
1116 	case PFLOW_PROTO_5:
1117 		pflow_sendout_v5(sc);
1118 		break;
1119 	case PFLOW_PROTO_10:
1120 		pflow_sendout_ipfix(sc, PFLOW_INET);
1121 		pflow_sendout_ipfix(sc, PFLOW_INET6);
1122 		pflow_sendout_ipfix(sc, PFLOW_NAT4);
1123 		break;
1124 	default: /* NOTREACHED */
1125 		break;
1126 	}
1127 }
1128 
1129 static int
1130 pflow_sendout_v5(struct pflow_softc *sc)
1131 {
1132 	struct mbuf		*m = sc->sc_mbuf;
1133 	struct pflow_header	*h;
1134 	struct timespec		tv;
1135 
1136 	PFLOW_ASSERT(sc);
1137 
1138 	if (m == NULL)
1139 		return (0);
1140 
1141 	sc->sc_mbuf = NULL;
1142 
1143 	pflowstat_inc(pflow_packets);
1144 	h = mtod(m, struct pflow_header *);
1145 	h->count = htons(sc->sc_count);
1146 
1147 	/* populate pflow_header */
1148 	h->uptime_ms = htonl(time_uptime * 1000);
1149 
1150 	getnanotime(&tv);
1151 	h->time_sec = htonl(tv.tv_sec);			/* XXX 2038 */
1152 	h->time_nanosec = htonl(tv.tv_nsec);
1153 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1154 		swi_sched(sc->sc_swi_cookie, 0);
1155 
1156 	return (0);
1157 }
1158 
1159 static int
1160 pflow_sendout_ipfix(struct pflow_softc *sc, enum pflow_family_t af)
1161 {
1162 	struct mbuf			*m;
1163 	struct pflow_v10_header		*h10;
1164 	struct pflow_set_header		*set_hdr;
1165 	u_int32_t			 count;
1166 	int				 set_length;
1167 
1168 	PFLOW_ASSERT(sc);
1169 
1170 	switch (af) {
1171 	case PFLOW_INET:
1172 		m = sc->sc_mbuf;
1173 		callout_stop(&sc->sc_tmo);
1174 		if (m == NULL)
1175 			return (0);
1176 		sc->sc_mbuf = NULL;
1177 		count = sc->sc_count4;
1178 		set_length = sizeof(struct pflow_set_header)
1179 		    + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4);
1180 		break;
1181 	case PFLOW_INET6:
1182 		m = sc->sc_mbuf6;
1183 		callout_stop(&sc->sc_tmo6);
1184 		if (m == NULL)
1185 			return (0);
1186 		sc->sc_mbuf6 = NULL;
1187 		count = sc->sc_count6;
1188 		set_length = sizeof(struct pflow_set_header)
1189 		    + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6);
1190 		break;
1191 	case PFLOW_NAT4:
1192 		m = sc->sc_mbuf_nat4;
1193 		callout_stop(&sc->sc_tmo_nat4);
1194 		if (m == NULL)
1195 			return (0);
1196 		sc->sc_mbuf_nat4 = NULL;
1197 		count = sc->sc_count_nat4;
1198 		set_length = sizeof(struct pflow_set_header)
1199 		    + sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4);
1200 		break;
1201 	default:
1202 		panic("Unsupported AF %d", af);
1203 	}
1204 
1205 	pflowstat_inc(pflow_packets);
1206 
1207 	set_hdr = mtod(m, struct pflow_set_header *);
1208 	set_hdr->set_length = htons(set_length);
1209 
1210 	/* populate pflow_header */
1211 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1212 	if (m == NULL) {
1213 		pflowstat_inc(pflow_onomem);
1214 		return (ENOBUFS);
1215 	}
1216 	h10 = mtod(m, struct pflow_v10_header *);
1217 	h10->version = htons(PFLOW_PROTO_10);
1218 	h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length);
1219 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1220 	h10->flow_sequence = htonl(sc->sc_sequence);
1221 	sc->sc_sequence += count;
1222 	h10->observation_dom = htonl(sc->sc_observation_dom);
1223 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1224 		swi_sched(sc->sc_swi_cookie, 0);
1225 
1226 	return (0);
1227 }
1228 
1229 static int
1230 pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
1231 {
1232 	struct mbuf			*m;
1233 	struct pflow_v10_header		*h10;
1234 
1235 	PFLOW_ASSERT(sc);
1236 
1237 	m = pflow_get_mbuf(sc, 0);
1238 	if (m == NULL)
1239 		return (0);
1240 	m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
1241 	    (caddr_t)&sc->sc_tmpl_ipfix);
1242 
1243 	pflowstat_inc(pflow_packets);
1244 
1245 	/* populate pflow_header */
1246 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1247 	if (m == NULL) {
1248 		pflowstat_inc(pflow_onomem);
1249 		return (ENOBUFS);
1250 	}
1251 	h10 = mtod(m, struct pflow_v10_header *);
1252 	h10->version = htons(PFLOW_PROTO_10);
1253 	h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct
1254 	    pflow_ipfix_tmpl));
1255 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1256 	h10->flow_sequence = htonl(sc->sc_sequence);
1257 	h10->observation_dom = htonl(sc->sc_observation_dom);
1258 
1259 	callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1260 	    pflow_timeout_tmpl, sc);
1261 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1262 		swi_sched(sc->sc_swi_cookie, 0);
1263 
1264 	return (0);
1265 }
1266 
1267 static int
1268 pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
1269 {
1270 	if (sc->so == NULL) {
1271 		m_freem(m);
1272 		return (EINVAL);
1273 	}
1274 	return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread));
1275 }
1276 
1277 static int
1278 sysctl_pflowstats(SYSCTL_HANDLER_ARGS)
1279 {
1280 	struct pflowstats pflowstats;
1281 
1282 	pflowstats.pflow_flows =
1283 	    counter_u64_fetch(V_pflowstats.c[pflow_flows]);
1284 	pflowstats.pflow_packets =
1285 	    counter_u64_fetch(V_pflowstats.c[pflow_packets]);
1286 	pflowstats.pflow_onomem =
1287 	    counter_u64_fetch(V_pflowstats.c[pflow_onomem]);
1288 	pflowstats.pflow_oerrors =
1289 	    counter_u64_fetch(V_pflowstats.c[pflow_oerrors]);
1290 
1291 	return (sysctl_handle_opaque(oidp, &pflowstats, sizeof(pflowstats), req));
1292 }
1293 
1294 static int
1295 pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt)
1296 {
1297 	struct epoch_tracker	 et;
1298 	struct pflow_softc	*sc = NULL;
1299 	struct nl_writer	 *nw = npt->nw;
1300 	int			 error = 0;
1301 
1302 	hdr->nlmsg_flags |= NLM_F_MULTI;
1303 
1304 	NET_EPOCH_ENTER(et);
1305 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1306 		if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1307 			error = ENOMEM;
1308 			goto out;
1309 		}
1310 
1311 		struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1312 		ghdr_new->cmd = PFLOWNL_CMD_LIST;
1313 		ghdr_new->version = 0;
1314 		ghdr_new->reserved = 0;
1315 
1316 		nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id);
1317 
1318 		if (! nlmsg_end(nw)) {
1319 			error = ENOMEM;
1320 			goto out;
1321 		}
1322 	}
1323 
1324 out:
1325 	NET_EPOCH_EXIT(et);
1326 
1327 	if (error != 0)
1328 		nlmsg_abort(nw);
1329 
1330 	return (error);
1331 }
1332 
1333 static int
1334 pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt)
1335 {
1336 	struct nl_writer	 *nw = npt->nw;
1337 	int			 error = 0;
1338 	int			 unit;
1339 
1340 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1341 		return (ENOMEM);
1342 	}
1343 
1344 	struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1345 	ghdr_new->cmd = PFLOWNL_CMD_CREATE;
1346 	ghdr_new->version = 0;
1347 	ghdr_new->reserved = 0;
1348 
1349 	unit = alloc_unr(V_pflow_unr);
1350 	if (unit == -1) {
1351 		nlmsg_abort(nw);
1352 		return (ENOMEM);
1353 	}
1354 
1355 	error = pflow_create(unit);
1356 	if (error != 0) {
1357 		free_unr(V_pflow_unr, unit);
1358 		nlmsg_abort(nw);
1359 		return (error);
1360 	}
1361 
1362 	nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit);
1363 
1364 	if (! nlmsg_end(nw)) {
1365 		pflow_destroy(unit, true);
1366 		return (ENOMEM);
1367 	}
1368 
1369 	return (0);
1370 }
1371 
1372 struct pflow_parsed_del {
1373 	int id;
1374 };
1375 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1376 #define	_OUT(_field)	offsetof(struct pflow_parsed_del, _field)
1377 static const struct nlattr_parser nla_p_del[] = {
1378 	{ .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1379 };
1380 static const struct nlfield_parser nlf_p_del[] = {};
1381 #undef _IN
1382 #undef _OUT
1383 NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del);
1384 
1385 static int
1386 pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt)
1387 {
1388 	struct pflow_parsed_del d = {};
1389 	int error;
1390 
1391 	error = nl_parse_nlmsg(hdr, &del_parser, npt, &d);
1392 	if (error != 0)
1393 		return (error);
1394 
1395 	error = pflow_destroy(d.id, true);
1396 
1397 	return (error);
1398 }
1399 
1400 struct pflow_parsed_get {
1401 	int id;
1402 };
1403 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1404 #define	_OUT(_field)	offsetof(struct pflow_parsed_get, _field)
1405 static const struct nlattr_parser nla_p_get[] = {
1406 	{ .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1407 };
1408 static const struct nlfield_parser nlf_p_get[] = {};
1409 #undef _IN
1410 #undef _OUT
1411 NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get);
1412 
1413 static bool
1414 nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s)
1415 {
1416 	int off = nlattr_add_nested(nw, attr);
1417 	if (off == 0)
1418 		return (false);
1419 
1420 	nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family);
1421 
1422 	switch (s->sa_family) {
1423 	case AF_INET: {
1424 		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
1425 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port);
1426 		nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr);
1427 		break;
1428 	}
1429 	case AF_INET6: {
1430 		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
1431 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port);
1432 		nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr);
1433 		break;
1434 	}
1435 	default:
1436 		panic("Unknown address family %d", s->sa_family);
1437 	}
1438 
1439 	nlattr_set_len(nw, off);
1440 	return (true);
1441 }
1442 
1443 static int
1444 pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
1445 {
1446 	struct epoch_tracker et;
1447 	struct pflow_parsed_get g = {};
1448 	struct pflow_softc *sc = NULL;
1449 	struct nl_writer *nw = npt->nw;
1450 	struct genlmsghdr *ghdr_new;
1451 	int error;
1452 
1453 	error = nl_parse_nlmsg(hdr, &get_parser, npt, &g);
1454 	if (error != 0)
1455 		return (error);
1456 
1457 	NET_EPOCH_ENTER(et);
1458 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1459 		if (sc->sc_id == g.id)
1460 			break;
1461 	}
1462 	if (sc == NULL) {
1463 		error = ENOENT;
1464 		goto out;
1465 	}
1466 
1467 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1468 		nlmsg_abort(nw);
1469 		error = ENOMEM;
1470 		goto out;
1471 	}
1472 
1473 	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1474 	if (ghdr_new == NULL) {
1475 		nlmsg_abort(nw);
1476 		error = ENOMEM;
1477 		goto out;
1478 	}
1479 
1480 	ghdr_new->cmd = PFLOWNL_CMD_GET;
1481 	ghdr_new->version = 0;
1482 	ghdr_new->reserved = 0;
1483 
1484 	nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id);
1485 	nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version);
1486 	if (sc->sc_flowsrc)
1487 		nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc);
1488 	if (sc->sc_flowdst)
1489 		nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst);
1490 	nlattr_add_u32(nw, PFLOWNL_GET_OBSERVATION_DOMAIN,
1491 	    sc->sc_observation_dom);
1492 	nlattr_add_u8(nw, PFLOWNL_GET_SOCKET_STATUS, sc->so != NULL);
1493 
1494 	if (! nlmsg_end(nw)) {
1495 		nlmsg_abort(nw);
1496 		error = ENOMEM;
1497 	}
1498 
1499 out:
1500 	NET_EPOCH_EXIT(et);
1501 
1502 	return (error);
1503 }
1504 
1505 struct pflow_sockaddr {
1506 	union {
1507 		struct sockaddr_in in;
1508 		struct sockaddr_in6 in6;
1509 		struct sockaddr_storage storage;
1510 	};
1511 };
1512 static bool
1513 pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused)
1514 {
1515 	struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args;
1516 
1517 	if (s->storage.ss_family == AF_INET)
1518 		s->storage.ss_len = sizeof(struct sockaddr_in);
1519 	else if (s->storage.ss_family == AF_INET6)
1520 		s->storage.ss_len = sizeof(struct sockaddr_in6);
1521 	else
1522 		return (false);
1523 
1524 	return (true);
1525 }
1526 
1527 #define	_OUT(_field)	offsetof(struct pflow_sockaddr, _field)
1528 static struct nlattr_parser nla_p_sockaddr[] = {
1529 	{ .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 },
1530 	{ .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 },
1531 	{ .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr },
1532 	{ .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr },
1533 };
1534 NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr);
1535 #undef _OUT
1536 
1537 struct pflow_parsed_set {
1538 	int id;
1539 	uint16_t version;
1540 	struct sockaddr_storage src;
1541 	struct sockaddr_storage dst;
1542 	uint32_t observation_dom;
1543 };
1544 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1545 #define	_OUT(_field)	offsetof(struct pflow_parsed_set, _field)
1546 static const struct nlattr_parser nla_p_set[] = {
1547 	{ .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1548 	{ .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 },
1549 	{ .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested },
1550 	{ .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested },
1551 	{ .type = PFLOWNL_SET_OBSERVATION_DOMAIN, .off = _OUT(observation_dom), .cb = nlattr_get_uint32 },
1552 };
1553 static const struct nlfield_parser nlf_p_set[] = {};
1554 #undef _IN
1555 #undef _OUT
1556 NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
1557 
1558 static int
1559 pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred)
1560 {
1561 	struct thread		*td;
1562 	struct socket		*so;
1563 	int			 error = 0;
1564 
1565 	td = curthread;
1566 
1567 	PFLOW_ASSERT(sc);
1568 
1569 	if (pflowr->version != 0) {
1570 		switch(pflowr->version) {
1571 		case PFLOW_PROTO_5:
1572 		case PFLOW_PROTO_10:
1573 			break;
1574 		default:
1575 			return(EINVAL);
1576 		}
1577 	}
1578 
1579 	pflow_flush(sc);
1580 
1581 	if (pflowr->dst.ss_len != 0) {
1582 		if (sc->sc_flowdst != NULL &&
1583 		    sc->sc_flowdst->sa_family != pflowr->dst.ss_family) {
1584 			free(sc->sc_flowdst, M_DEVBUF);
1585 			sc->sc_flowdst = NULL;
1586 			if (sc->so != NULL) {
1587 				soclose(sc->so);
1588 				sc->so = NULL;
1589 			}
1590 		}
1591 
1592 		switch (pflowr->dst.ss_family) {
1593 		case AF_INET:
1594 			if (sc->sc_flowdst == NULL) {
1595 				if ((sc->sc_flowdst = malloc(
1596 				    sizeof(struct sockaddr_in),
1597 				    M_DEVBUF,  M_NOWAIT)) == NULL)
1598 					return (ENOMEM);
1599 			}
1600 			memcpy(sc->sc_flowdst, &pflowr->dst,
1601 			    sizeof(struct sockaddr_in));
1602 			sc->sc_flowdst->sa_len = sizeof(struct
1603 			    sockaddr_in);
1604 			break;
1605 		case AF_INET6:
1606 			if (sc->sc_flowdst == NULL) {
1607 				if ((sc->sc_flowdst = malloc(
1608 				    sizeof(struct sockaddr_in6),
1609 				    M_DEVBUF, M_NOWAIT)) == NULL)
1610 					return (ENOMEM);
1611 			}
1612 			memcpy(sc->sc_flowdst, &pflowr->dst,
1613 			    sizeof(struct sockaddr_in6));
1614 			sc->sc_flowdst->sa_len = sizeof(struct
1615 			    sockaddr_in6);
1616 			break;
1617 		default:
1618 			break;
1619 		}
1620 	}
1621 
1622 	if (pflowr->src.ss_len != 0) {
1623 		if (sc->sc_flowsrc != NULL)
1624 			free(sc->sc_flowsrc, M_DEVBUF);
1625 		sc->sc_flowsrc = NULL;
1626 		if (sc->so != NULL) {
1627 			soclose(sc->so);
1628 			sc->so = NULL;
1629 		}
1630 		switch(pflowr->src.ss_family) {
1631 		case AF_INET:
1632 			if ((sc->sc_flowsrc = malloc(
1633 			    sizeof(struct sockaddr_in),
1634 			    M_DEVBUF, M_NOWAIT)) == NULL)
1635 				return (ENOMEM);
1636 			memcpy(sc->sc_flowsrc, &pflowr->src,
1637 			    sizeof(struct sockaddr_in));
1638 			sc->sc_flowsrc->sa_len = sizeof(struct
1639 			    sockaddr_in);
1640 			break;
1641 		case AF_INET6:
1642 			if ((sc->sc_flowsrc = malloc(
1643 			    sizeof(struct sockaddr_in6),
1644 			    M_DEVBUF, M_NOWAIT)) == NULL)
1645 				return (ENOMEM);
1646 			memcpy(sc->sc_flowsrc, &pflowr->src,
1647 			    sizeof(struct sockaddr_in6));
1648 			sc->sc_flowsrc->sa_len = sizeof(struct
1649 			    sockaddr_in6);
1650 			break;
1651 		default:
1652 			break;
1653 		}
1654 	}
1655 
1656 	if (sc->so == NULL) {
1657 		if (pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1658 			error = socreate(sc->sc_flowdst->sa_family,
1659 			    &so, SOCK_DGRAM, IPPROTO_UDP, cred, td);
1660 			if (error)
1661 				return (error);
1662 			if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) {
1663 				error = sobind(so, sc->sc_flowsrc, td);
1664 				if (error) {
1665 					soclose(so);
1666 					return (error);
1667 				}
1668 			}
1669 			sc->so = so;
1670 		}
1671 	} else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1672 		soclose(sc->so);
1673 		sc->so = NULL;
1674 	}
1675 
1676 	if (pflowr->observation_dom != 0)
1677 		sc->sc_observation_dom = pflowr->observation_dom;
1678 
1679 	/* error check is above */
1680 	if (pflowr->version != 0)
1681 		sc->sc_version = pflowr->version;
1682 
1683 	pflow_setmtu(sc, ETHERMTU);
1684 
1685 	switch (sc->sc_version) {
1686 	case PFLOW_PROTO_5:
1687 		callout_stop(&sc->sc_tmo6);
1688 		callout_stop(&sc->sc_tmo_tmpl);
1689 		break;
1690 	case PFLOW_PROTO_10:
1691 		callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1692 		    pflow_timeout_tmpl, sc);
1693 		break;
1694 	default: /* NOTREACHED */
1695 		break;
1696 	}
1697 
1698 	return (0);
1699 }
1700 
1701 static int
1702 pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
1703 {
1704 	struct epoch_tracker et;
1705 	struct pflow_parsed_set s = {};
1706 	struct pflow_softc *sc = NULL;
1707 	int error;
1708 
1709 	error = nl_parse_nlmsg(hdr, &set_parser, npt, &s);
1710 	if (error != 0)
1711 		return (error);
1712 
1713 	NET_EPOCH_ENTER(et);
1714 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1715 		if (sc->sc_id == s.id)
1716 			break;
1717 	}
1718 	if (sc == NULL) {
1719 		error = ENOENT;
1720 		goto out;
1721 	}
1722 
1723 	PFLOW_LOCK(sc);
1724 	error = pflow_set(sc, &s, nlp_get_cred(npt->nlp));
1725 	PFLOW_UNLOCK(sc);
1726 
1727 out:
1728 	NET_EPOCH_EXIT(et);
1729 	return (error);
1730 }
1731 
1732 static const struct genl_cmd pflow_cmds[] = {
1733 	{
1734 		.cmd_num = PFLOWNL_CMD_LIST,
1735 		.cmd_name = "LIST",
1736 		.cmd_cb = pflow_nl_list,
1737 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1738 		.cmd_priv = PRIV_NETINET_PF,
1739 	},
1740 	{
1741 		.cmd_num = PFLOWNL_CMD_CREATE,
1742 		.cmd_name = "CREATE",
1743 		.cmd_cb = pflow_nl_create,
1744 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1745 		.cmd_priv = PRIV_NETINET_PF,
1746 	},
1747 	{
1748 		.cmd_num = PFLOWNL_CMD_DEL,
1749 		.cmd_name = "DEL",
1750 		.cmd_cb = pflow_nl_del,
1751 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1752 		.cmd_priv = PRIV_NETINET_PF,
1753 	},
1754 	{
1755 		.cmd_num = PFLOWNL_CMD_GET,
1756 		.cmd_name = "GET",
1757 		.cmd_cb = pflow_nl_get,
1758 		.cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1759 		.cmd_priv = PRIV_NETINET_PF,
1760 	},
1761 	{
1762 		.cmd_num = PFLOWNL_CMD_SET,
1763 		.cmd_name = "SET",
1764 		.cmd_cb = pflow_nl_set,
1765 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1766 		.cmd_priv = PRIV_NETINET_PF,
1767 	},
1768 };
1769 
1770 static const struct nlhdr_parser *all_parsers[] = {
1771 	&del_parser,
1772 	&get_parser,
1773 	&set_parser,
1774 };
1775 
1776 static int
1777 pflow_init(void)
1778 {
1779 	bool ret;
1780 	int family_id __diagused;
1781 
1782 	NL_VERIFY_PARSERS(all_parsers);
1783 
1784 	family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX);
1785 	MPASS(family_id != 0);
1786 	ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, NL_ARRAY_LEN(pflow_cmds));
1787 
1788 	return (ret ? 0 : ENODEV);
1789 }
1790 
1791 static void
1792 pflow_uninit(void)
1793 {
1794 	genl_unregister_family(PFLOWNL_FAMILY_NAME);
1795 }
1796 
1797 static int
1798 pflow_modevent(module_t mod, int type, void *data)
1799 {
1800 	int error = 0;
1801 
1802 	switch (type) {
1803 	case MOD_LOAD:
1804 		error = pflow_init();
1805 		break;
1806 	case MOD_UNLOAD:
1807 		pflow_uninit();
1808 		break;
1809 	default:
1810 		error = EINVAL;
1811 		break;
1812 	}
1813 
1814 	return (error);
1815 }
1816 
1817 static moduledata_t pflow_mod = {
1818 	pflowname,
1819 	pflow_modevent,
1820 	0
1821 };
1822 
1823 DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
1824 MODULE_VERSION(pflow, 1);
1825 MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1826