xref: /freebsd/sys/netpfil/pf/pflow.c (revision f92d9b1aad73fc47f8f0b960808ca2c1a938e9e7)
1 /*	$OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $	*/
2 
3 /*
4  * Copyright (c) 2023 Rubicon Communications, LLC (Netgate)
5  * Copyright (c) 2011 Florian Obser <florian@narrans.de>
6  * Copyright (c) 2011 Sebastian Benoit <benoit-lists@fb12.de>
7  * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
8  * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
9  *
10  * Permission to use, copy, modify, and distribute this software for any
11  * purpose with or without fee is hereby granted, provided that the above
12  * copyright notice and this permission notice appear in all copies.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
20  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #include <sys/cdefs.h>
24 #include <sys/param.h>
25 #include <sys/bus.h>
26 #include <sys/callout.h>
27 #include <sys/endian.h>
28 #include <sys/interrupt.h>
29 #include <sys/kernel.h>
30 #include <sys/malloc.h>
31 #include <sys/module.h>
32 #include <sys/mbuf.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/sockio.h>
36 #include <sys/sysctl.h>
37 #include <sys/systm.h>
38 #include <sys/priv.h>
39 
40 #include <net/if.h>
41 #include <net/if_types.h>
42 #include <net/bpf.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <netinet/if_ether.h>
46 #include <netinet/tcp.h>
47 
48 #include <netinet/ip.h>
49 #include <netinet/ip_icmp.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/udp.h>
52 #include <netinet/udp_var.h>
53 #include <netinet/in_pcb.h>
54 
55 #include <netlink/netlink.h>
56 #include <netlink/netlink_ctl.h>
57 #include <netlink/netlink_generic.h>
58 #include <netlink/netlink_message_writer.h>
59 
60 #include <net/pfvar.h>
61 #include <net/pflow.h>
62 #include "net/if_var.h"
63 
64 #define PFLOW_MINMTU	\
65     (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
66 
67 #ifdef PFLOWDEBUG
68 #define DPRINTF(x)	do { printf x ; } while (0)
69 #else
70 #define DPRINTF(x)
71 #endif
72 
73 static void	pflow_output_process(void *);
74 static int	pflow_create(int);
75 static int	pflow_destroy(int, bool);
76 static int	pflow_calc_mtu(struct pflow_softc *, int, int);
77 static void	pflow_setmtu(struct pflow_softc *, int);
78 static int	pflowvalidsockaddr(const struct sockaddr *, int);
79 
80 static struct mbuf	*pflow_get_mbuf(struct pflow_softc *, u_int16_t);
81 static void	pflow_flush(struct pflow_softc *);
82 static int	pflow_sendout_v5(struct pflow_softc *);
83 static int	pflow_sendout_ipfix(struct pflow_softc *, sa_family_t);
84 static int	pflow_sendout_ipfix_tmpl(struct pflow_softc *);
85 static int	pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
86 static void	pflow_timeout(void *);
87 static void	pflow_timeout6(void *);
88 static void	pflow_timeout_tmpl(void *);
89 static void	copy_flow_data(struct pflow_flow *, struct pflow_flow *,
90 	struct pf_kstate *, struct pf_state_key *, int, int);
91 static void	copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *,
92 	struct pflow_ipfix_flow4 *, struct pf_kstate *, struct pf_state_key *,
93 	struct pflow_softc *, int, int);
94 static void	copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *,
95 	struct pflow_ipfix_flow6 *, struct pf_kstate *, struct pf_state_key *,
96 	struct pflow_softc *, int, int);
97 static int	pflow_pack_flow(struct pf_kstate *, struct pf_state_key *,
98 	struct pflow_softc *);
99 static int	pflow_pack_flow_ipfix(struct pf_kstate *, struct pf_state_key *,
100 	struct pflow_softc *);
101 static int	export_pflow_if(struct pf_kstate*, struct pf_state_key *,
102 	struct pflow_softc *);
103 static int	copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
104 static int	copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow,
105 	struct pflow_softc *sc);
106 static int	copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow,
107 	struct pflow_softc *sc);
108 
109 static const char pflowname[] = "pflow";
110 
111 /**
112  * Locking concept
113  *
114  * The list of pflow devices (V_pflowif_list) is managed through epoch.
115  * It is safe to read the list without locking (while in NET_EPOCH).
116  * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx
117  * on every add/delete.
118  *
119  * Each pflow interface protects its own data with the sc_lock mutex.
120  *
121  * We do not require any pf locks, and in fact expect to be called without
122  * hashrow locks held.
123  **/
124 
125 VNET_DEFINE(struct unrhdr *,	pflow_unr);
126 #define	V_pflow_unr	VNET(pflow_unr)
127 VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list);
128 #define	V_pflowif_list	VNET(pflowif_list)
129 VNET_DEFINE(struct mtx, pflowif_list_mtx);
130 #define	V_pflowif_list_mtx	VNET(pflowif_list_mtx)
131 VNET_DEFINE(struct pflowstats,	 pflowstats);
132 #define	V_pflowstats	VNET(pflowstats)
133 
134 #define	PFLOW_LOCK(_sc)		mtx_lock(&(_sc)->sc_lock)
135 #define	PFLOW_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_lock)
136 #define	PFLOW_ASSERT(_sc)	mtx_assert(&(_sc)->sc_lock, MA_OWNED)
137 
138 static void
139 vnet_pflowattach(void)
140 {
141 	CK_LIST_INIT(&V_pflowif_list);
142 	mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF);
143 
144 	V_pflow_unr = new_unrhdr(0, INT_MAX, &V_pflowif_list_mtx);
145 }
146 VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
147     vnet_pflowattach, NULL);
148 
149 static void
150 vnet_pflowdetach(void)
151 {
152 	struct pflow_softc	*sc;
153 
154 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
155 		pflow_destroy(sc->sc_id, false);
156 	}
157 
158 	MPASS(CK_LIST_EMPTY(&V_pflowif_list));
159 	delete_unrhdr(V_pflow_unr);
160 	mtx_destroy(&V_pflowif_list_mtx);
161 }
162 VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
163     vnet_pflowdetach, NULL);
164 
165 static void
166 vnet_pflow_finalise(void)
167 {
168 	/*
169 	 * Ensure we've freed all interfaces, and do not have pending
170 	 * epoch cleanup calls.
171 	 */
172 	NET_EPOCH_DRAIN_CALLBACKS();
173 }
174 VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
175     vnet_pflow_finalise, NULL);
176 
177 static void
178 pflow_output_process(void *arg)
179 {
180 	struct mbufq ml;
181 	struct pflow_softc *sc = arg;
182 	struct mbuf *m;
183 
184 	mbufq_init(&ml, 0);
185 
186 	PFLOW_LOCK(sc);
187 	mbufq_concat(&ml, &sc->sc_outputqueue);
188 	PFLOW_UNLOCK(sc);
189 
190 	CURVNET_SET(sc->sc_vnet);
191 	while ((m = mbufq_dequeue(&ml)) != NULL) {
192 		pflow_sendout_mbuf(sc, m);
193 	}
194 	CURVNET_RESTORE();
195 }
196 
197 static int
198 pflow_create(int unit)
199 {
200 	struct pflow_softc	*pflowif;
201 	int			 error;
202 
203 	pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
204 	mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF);
205 	pflowif->sc_version = PFLOW_PROTO_DEFAULT;
206 
207 	/* ipfix template init */
208 	bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix));
209 	pflowif->sc_tmpl_ipfix.set_header.set_id =
210 	    htons(PFLOW_IPFIX_TMPL_SET_ID);
211 	pflowif->sc_tmpl_ipfix.set_header.set_length =
212 	    htons(sizeof(struct pflow_ipfix_tmpl));
213 
214 	/* ipfix IPv4 template */
215 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id =
216 	    htons(PFLOW_IPFIX_TMPL_IPV4_ID);
217 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count
218 	    = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT);
219 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id =
220 	    htons(PFIX_IE_sourceIPv4Address);
221 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4);
222 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id =
223 	    htons(PFIX_IE_destinationIPv4Address);
224 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4);
225 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id =
226 	    htons(PFIX_IE_ingressInterface);
227 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4);
228 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id =
229 	    htons(PFIX_IE_egressInterface);
230 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4);
231 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id =
232 	    htons(PFIX_IE_packetDeltaCount);
233 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8);
234 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id =
235 	    htons(PFIX_IE_octetDeltaCount);
236 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8);
237 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id =
238 	    htons(PFIX_IE_flowStartMilliseconds);
239 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8);
240 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id =
241 	    htons(PFIX_IE_flowEndMilliseconds);
242 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8);
243 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id =
244 	    htons(PFIX_IE_sourceTransportPort);
245 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2);
246 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id =
247 	    htons(PFIX_IE_destinationTransportPort);
248 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2);
249 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id =
250 	    htons(PFIX_IE_ipClassOfService);
251 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1);
252 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id =
253 	    htons(PFIX_IE_protocolIdentifier);
254 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1);
255 
256 	/* ipfix IPv6 template */
257 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id =
258 	    htons(PFLOW_IPFIX_TMPL_IPV6_ID);
259 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count =
260 	    htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT);
261 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id =
262 	    htons(PFIX_IE_sourceIPv6Address);
263 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16);
264 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id =
265 	    htons(PFIX_IE_destinationIPv6Address);
266 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16);
267 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id =
268 	    htons(PFIX_IE_ingressInterface);
269 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4);
270 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id =
271 	    htons(PFIX_IE_egressInterface);
272 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4);
273 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id =
274 	    htons(PFIX_IE_packetDeltaCount);
275 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8);
276 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id =
277 	    htons(PFIX_IE_octetDeltaCount);
278 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8);
279 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id =
280 	    htons(PFIX_IE_flowStartMilliseconds);
281 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8);
282 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id =
283 	    htons(PFIX_IE_flowEndMilliseconds);
284 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8);
285 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id =
286 	    htons(PFIX_IE_sourceTransportPort);
287 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2);
288 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id =
289 	    htons(PFIX_IE_destinationTransportPort);
290 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2);
291 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id =
292 	    htons(PFIX_IE_ipClassOfService);
293 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1);
294 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id =
295 	    htons(PFIX_IE_protocolIdentifier);
296 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1);
297 
298 	pflowif->sc_id = unit;
299 	pflowif->sc_vnet = curvnet;
300 
301 	mbufq_init(&pflowif->sc_outputqueue, 8192);
302 	pflow_setmtu(pflowif, ETHERMTU);
303 
304 	callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0);
305 	callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0);
306 	callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0);
307 
308 	error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process,
309 	    pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie);
310 	if (error) {
311 		free(pflowif, M_DEVBUF);
312 		return (error);
313 	}
314 
315 	/* Insert into list of pflows */
316 	mtx_lock(&V_pflowif_list_mtx);
317 	CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next);
318 	mtx_unlock(&V_pflowif_list_mtx);
319 
320 	return (0);
321 }
322 
323 static void
324 pflow_free_cb(struct epoch_context *ctx)
325 {
326 	struct pflow_softc *sc;
327 
328 	sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx);
329 
330 	free(sc, M_DEVBUF);
331 }
332 
333 static int
334 pflow_destroy(int unit, bool drain)
335 {
336 	struct pflow_softc	*sc;
337 	int			 error __diagused;
338 
339 	mtx_lock(&V_pflowif_list_mtx);
340 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
341 		if (sc->sc_id == unit)
342 			break;
343 	}
344 	if (sc == NULL) {
345 		mtx_unlock(&V_pflowif_list_mtx);
346 		return (ENOENT);
347 	}
348 	CK_LIST_REMOVE(sc, sc_next);
349 	mtx_unlock(&V_pflowif_list_mtx);
350 
351 	sc->sc_dying = 1;
352 
353 	if (drain) {
354 		/* Let's be sure no one is using this interface any more. */
355 		NET_EPOCH_DRAIN_CALLBACKS();
356 	}
357 
358 	error = swi_remove(sc->sc_swi_cookie);
359 	MPASS(error == 0);
360 	error = intr_event_destroy(sc->sc_swi_ie);
361 	MPASS(error == 0);
362 
363 	callout_drain(&sc->sc_tmo);
364 	callout_drain(&sc->sc_tmo6);
365 	callout_drain(&sc->sc_tmo_tmpl);
366 
367 	m_freem(sc->sc_mbuf);
368 	m_freem(sc->sc_mbuf6);
369 
370 	PFLOW_LOCK(sc);
371 	mbufq_drain(&sc->sc_outputqueue);
372 	if (sc->so != NULL) {
373 		soclose(sc->so);
374 		sc->so = NULL;
375 	}
376 	if (sc->sc_flowdst != NULL)
377 		free(sc->sc_flowdst, M_DEVBUF);
378 	if (sc->sc_flowsrc != NULL)
379 		free(sc->sc_flowsrc, M_DEVBUF);
380 	PFLOW_UNLOCK(sc);
381 
382 	mtx_destroy(&sc->sc_lock);
383 
384 	free_unr(V_pflow_unr, unit);
385 
386 	NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx);
387 
388 	return (0);
389 }
390 
391 static int
392 pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port)
393 {
394 	const struct sockaddr_in6	*sin6;
395 	const struct sockaddr_in	*sin;
396 
397 	if (sa == NULL)
398 		return (0);
399 	switch(sa->sa_family) {
400 	case AF_INET:
401 		sin = (const struct sockaddr_in *)sa;
402 		return (sin->sin_addr.s_addr != INADDR_ANY &&
403 		    (ignore_port || sin->sin_port != 0));
404 	case AF_INET6:
405 		sin6 = (const struct sockaddr_in6 *)sa;
406 		return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
407 		    (ignore_port || sin6->sin6_port != 0));
408 	default:
409 		return (0);
410 	}
411 }
412 
413 static int
414 pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
415 {
416 
417 	sc->sc_maxcount4 = (mtu - hdrsz -
418 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
419 	sc->sc_maxcount6 = (mtu - hdrsz -
420 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6);
421 	if (sc->sc_maxcount4 > PFLOW_MAXFLOWS)
422 		sc->sc_maxcount4 = PFLOW_MAXFLOWS;
423 	if (sc->sc_maxcount6 > PFLOW_MAXFLOWS)
424 		sc->sc_maxcount6 = PFLOW_MAXFLOWS;
425 	return (hdrsz + sizeof(struct udpiphdr) +
426 	    MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4),
427 	    sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6)));
428 }
429 
430 static void
431 pflow_setmtu(struct pflow_softc *sc, int mtu_req)
432 {
433 	int	mtu;
434 
435 	mtu = mtu_req;
436 
437 	switch (sc->sc_version) {
438 	case PFLOW_PROTO_5:
439 		sc->sc_maxcount = (mtu - sizeof(struct pflow_header) -
440 		    sizeof(struct udpiphdr)) / sizeof(struct pflow_flow);
441 		if (sc->sc_maxcount > PFLOW_MAXFLOWS)
442 		    sc->sc_maxcount = PFLOW_MAXFLOWS;
443 		break;
444 	case PFLOW_PROTO_10:
445 		pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header));
446 		break;
447 	default: /* NOTREACHED */
448 		break;
449 	}
450 }
451 
452 static struct mbuf *
453 pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id)
454 {
455 	struct pflow_set_header	 set_hdr;
456 	struct pflow_header	 h;
457 	struct mbuf		*m;
458 
459 	MGETHDR(m, M_NOWAIT, MT_DATA);
460 	if (m == NULL) {
461 		V_pflowstats.pflow_onomem++;
462 		return (NULL);
463 	}
464 
465 	MCLGET(m, M_NOWAIT);
466 	if ((m->m_flags & M_EXT) == 0) {
467 		m_free(m);
468 		V_pflowstats.pflow_onomem++;
469 		return (NULL);
470 	}
471 
472 	m->m_len = m->m_pkthdr.len = 0;
473 
474 	if (sc == NULL)		/* get only a new empty mbuf */
475 		return (m);
476 
477 	switch (sc->sc_version) {
478 	case PFLOW_PROTO_5:
479 		/* populate pflow_header */
480 		h.reserved1 = 0;
481 		h.reserved2 = 0;
482 		h.count = 0;
483 		h.version = htons(PFLOW_PROTO_5);
484 		h.flow_sequence = htonl(sc->sc_gcounter);
485 		h.engine_type = PFLOW_ENGINE_TYPE;
486 		h.engine_id = PFLOW_ENGINE_ID;
487 		m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h);
488 
489 		sc->sc_count = 0;
490 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
491 		    pflow_timeout, sc);
492 		break;
493 	case PFLOW_PROTO_10:
494 		/* populate pflow_set_header */
495 		set_hdr.set_length = 0;
496 		set_hdr.set_id = htons(set_id);
497 		m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr);
498 		break;
499 	default: /* NOTREACHED */
500 		break;
501 	}
502 
503 	return (m);
504 }
505 
506 static void
507 copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
508     struct pf_kstate *st, struct pf_state_key *sk, int src, int dst)
509 {
510 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
511 	flow1->src_port = flow2->dest_port = sk->port[src];
512 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
513 	flow1->dest_port = flow2->src_port = sk->port[dst];
514 
515 	flow1->dest_as = flow2->src_as =
516 	    flow1->src_as = flow2->dest_as = 0;
517 	flow1->if_index_in = htons(st->if_index_in);
518 	flow1->if_index_out = htons(st->if_index_out);
519 	flow2->if_index_in = htons(st->if_index_out);
520 	flow2->if_index_out = htons(st->if_index_in);
521 	flow1->dest_mask = flow2->src_mask =
522 	    flow1->src_mask = flow2->dest_mask = 0;
523 
524 	flow1->flow_packets = htonl(st->packets[0]);
525 	flow2->flow_packets = htonl(st->packets[1]);
526 	flow1->flow_octets = htonl(st->bytes[0]);
527 	flow2->flow_octets = htonl(st->bytes[1]);
528 
529 	/*
530 	 * Pretend the flow was created or expired when the machine came up
531 	 * when creation is in the future of the last time a package was seen
532 	 * or was created / expired before this machine came up due to pfsync.
533 	 */
534 	flow1->flow_start = flow2->flow_start = st->creation < 0 ||
535 	    st->creation > st->expire ? htonl(0) : htonl(st->creation * 1000);
536 	flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) :
537 	    htonl(st->expire * 1000);
538 	flow1->tcp_flags = flow2->tcp_flags = 0;
539 	flow1->protocol = flow2->protocol = sk->proto;
540 	flow1->tos = flow2->tos = st->rule.ptr->tos;
541 }
542 
543 static void
544 copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1,
545     struct pflow_ipfix_flow4 *flow2, struct pf_kstate *st,
546     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
547 {
548 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
549 	flow1->src_port = flow2->dest_port = sk->port[src];
550 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
551 	flow1->dest_port = flow2->src_port = sk->port[dst];
552 
553 	flow1->if_index_in = htonl(st->if_index_in);
554 	flow1->if_index_out = htonl(st->if_index_out);
555 	flow2->if_index_in = htonl(st->if_index_out);
556 	flow2->if_index_out = htonl(st->if_index_in);
557 
558 	flow1->flow_packets = htobe64(st->packets[0]);
559 	flow2->flow_packets = htobe64(st->packets[1]);
560 	flow1->flow_octets = htobe64(st->bytes[0]);
561 	flow2->flow_octets = htobe64(st->bytes[1]);
562 
563 	/*
564 	 * Pretend the flow was created when the machine came up when creation
565 	 * is in the future of the last time a package was seen due to pfsync.
566 	 */
567 	if (st->creation > st->expire)
568 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
569 		    time_uptime)*1000);
570 	else
571 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
572 		    (time_uptime - st->creation))*1000);
573 	flow1->flow_finish = flow2->flow_finish = htobe64((time_second -
574 	    (time_uptime - st->expire))*1000);
575 
576 	flow1->protocol = flow2->protocol = sk->proto;
577 	flow1->tos = flow2->tos = st->rule.ptr->tos;
578 }
579 
580 static void
581 copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1,
582     struct pflow_ipfix_flow6 *flow2, struct pf_kstate *st,
583     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
584 {
585 	bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip));
586 	bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip));
587 	flow1->src_port = flow2->dest_port = sk->port[src];
588 	bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip));
589 	bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip));
590 	flow1->dest_port = flow2->src_port = sk->port[dst];
591 
592 	flow1->if_index_in = htonl(st->if_index_in);
593 	flow1->if_index_out = htonl(st->if_index_out);
594 	flow2->if_index_in = htonl(st->if_index_out);
595 	flow2->if_index_out = htonl(st->if_index_in);
596 
597 	flow1->flow_packets = htobe64(st->packets[0]);
598 	flow2->flow_packets = htobe64(st->packets[1]);
599 	flow1->flow_octets = htobe64(st->bytes[0]);
600 	flow2->flow_octets = htobe64(st->bytes[1]);
601 
602 	/*
603 	 * Pretend the flow was created when the machine came up when creation
604 	 * is in the future of the last time a package was seen due to pfsync.
605 	 */
606 	if (st->creation > st->expire)
607 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
608 		    time_uptime)*1000);
609 	else
610 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
611 		    (time_uptime - st->creation))*1000);
612 	flow1->flow_finish = flow2->flow_finish = htobe64((time_second -
613 	    (time_uptime - st->expire))*1000);
614 
615 	flow1->protocol = flow2->protocol = sk->proto;
616 	flow1->tos = flow2->tos = st->rule.ptr->tos;
617 }
618 
619 int
620 export_pflow(struct pf_kstate *st)
621 {
622 	struct pflow_softc	*sc = NULL;
623 	struct pf_state_key	*sk;
624 
625 	NET_EPOCH_ASSERT();
626 
627 	sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
628 
629 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
630 		PFLOW_LOCK(sc);
631 		switch (sc->sc_version) {
632 		case PFLOW_PROTO_5:
633 			if (sk->af == AF_INET)
634 				export_pflow_if(st, sk, sc);
635 			break;
636 		case PFLOW_PROTO_10:
637 			if (sk->af == AF_INET || sk->af == AF_INET6)
638 				export_pflow_if(st, sk, sc);
639 			break;
640 		default: /* NOTREACHED */
641 			break;
642 		}
643 		PFLOW_UNLOCK(sc);
644 	}
645 
646 	return (0);
647 }
648 
649 static int
650 export_pflow_if(struct pf_kstate *st, struct pf_state_key *sk,
651     struct pflow_softc *sc)
652 {
653 	struct pf_kstate	 pfs_copy;
654 	u_int64_t		 bytes[2];
655 	int			 ret = 0;
656 
657 	if (sc->sc_version == PFLOW_PROTO_10)
658 		return (pflow_pack_flow_ipfix(st, sk, sc));
659 
660 	/* PFLOW_PROTO_5 */
661 	if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
662 	    && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
663 		return (pflow_pack_flow(st, sk, sc));
664 
665 	/* flow > PFLOW_MAXBYTES need special handling */
666 	bcopy(st, &pfs_copy, sizeof(pfs_copy));
667 	bytes[0] = pfs_copy.bytes[0];
668 	bytes[1] = pfs_copy.bytes[1];
669 
670 	while (bytes[0] > PFLOW_MAXBYTES) {
671 		pfs_copy.bytes[0] = PFLOW_MAXBYTES;
672 		pfs_copy.bytes[1] = 0;
673 
674 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
675 			return (ret);
676 		if ((bytes[0] - PFLOW_MAXBYTES) > 0)
677 			bytes[0] -= PFLOW_MAXBYTES;
678 	}
679 
680 	while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
681 		pfs_copy.bytes[1] = PFLOW_MAXBYTES;
682 		pfs_copy.bytes[0] = 0;
683 
684 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
685 			return (ret);
686 		if ((bytes[1] - PFLOW_MAXBYTES) > 0)
687 			bytes[1] -= PFLOW_MAXBYTES;
688 	}
689 
690 	pfs_copy.bytes[0] = bytes[0];
691 	pfs_copy.bytes[1] = bytes[1];
692 
693 	return (pflow_pack_flow(&pfs_copy, sk, sc));
694 }
695 
696 static int
697 copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc)
698 {
699 	int		ret = 0;
700 
701 	PFLOW_ASSERT(sc);
702 
703 	if (sc->sc_mbuf == NULL) {
704 		if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
705 			return (ENOBUFS);
706 	}
707 	m_copyback(sc->sc_mbuf, PFLOW_HDRLEN +
708 	    (sc->sc_count * sizeof(struct pflow_flow)),
709 	    sizeof(struct pflow_flow), (caddr_t)flow);
710 
711 	if (V_pflowstats.pflow_flows == sc->sc_gcounter)
712 		V_pflowstats.pflow_flows++;
713 	sc->sc_gcounter++;
714 	sc->sc_count++;
715 
716 	if (sc->sc_count >= sc->sc_maxcount)
717 		ret = pflow_sendout_v5(sc);
718 
719 	return(ret);
720 }
721 
722 static int
723 copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc)
724 {
725 	int		ret = 0;
726 
727 	PFLOW_ASSERT(sc);
728 
729 	if (sc->sc_mbuf == NULL) {
730 		if ((sc->sc_mbuf =
731 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
732 			return (ENOBUFS);
733 		}
734 		sc->sc_count4 = 0;
735 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
736 		    pflow_timeout, sc);
737 	}
738 	m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN +
739 	    (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)),
740 	    sizeof(struct pflow_ipfix_flow4), (caddr_t)flow);
741 
742 	if (V_pflowstats.pflow_flows == sc->sc_gcounter)
743 		V_pflowstats.pflow_flows++;
744 	sc->sc_gcounter++;
745 	sc->sc_count4++;
746 
747 	if (sc->sc_count4 >= sc->sc_maxcount4)
748 		ret = pflow_sendout_ipfix(sc, AF_INET);
749 	return(ret);
750 }
751 
752 static int
753 copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc)
754 {
755 	int		ret = 0;
756 
757 	PFLOW_ASSERT(sc);
758 
759 	if (sc->sc_mbuf6 == NULL) {
760 		if ((sc->sc_mbuf6 =
761 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
762 			return (ENOBUFS);
763 		}
764 		sc->sc_count6 = 0;
765 		callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz,
766 		    pflow_timeout6, sc);
767 	}
768 	m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN +
769 	    (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)),
770 	    sizeof(struct pflow_ipfix_flow6), (caddr_t)flow);
771 
772 	if (V_pflowstats.pflow_flows == sc->sc_gcounter)
773 		V_pflowstats.pflow_flows++;
774 	sc->sc_gcounter++;
775 	sc->sc_count6++;
776 
777 	if (sc->sc_count6 >= sc->sc_maxcount6)
778 		ret = pflow_sendout_ipfix(sc, AF_INET6);
779 
780 	return(ret);
781 }
782 
783 static int
784 pflow_pack_flow(struct pf_kstate *st, struct pf_state_key *sk,
785     struct pflow_softc *sc)
786 {
787 	struct pflow_flow	 flow1;
788 	struct pflow_flow	 flow2;
789 	int			 ret = 0;
790 
791 	bzero(&flow1, sizeof(flow1));
792 	bzero(&flow2, sizeof(flow2));
793 
794 	if (st->direction == PF_OUT)
795 		copy_flow_data(&flow1, &flow2, st, sk, 1, 0);
796 	else
797 		copy_flow_data(&flow1, &flow2, st, sk, 0, 1);
798 
799 	if (st->bytes[0] != 0) /* first flow from state */
800 		ret = copy_flow_to_m(&flow1, sc);
801 
802 	if (st->bytes[1] != 0) /* second flow from state */
803 		ret = copy_flow_to_m(&flow2, sc);
804 
805 	return (ret);
806 }
807 
808 static int
809 pflow_pack_flow_ipfix(struct pf_kstate *st, struct pf_state_key *sk,
810     struct pflow_softc *sc)
811 {
812 	struct pflow_ipfix_flow4	 flow4_1, flow4_2;
813 	struct pflow_ipfix_flow6	 flow6_1, flow6_2;
814 	int				 ret = 0;
815 	if (sk->af == AF_INET) {
816 		bzero(&flow4_1, sizeof(flow4_1));
817 		bzero(&flow4_2, sizeof(flow4_2));
818 
819 		if (st->direction == PF_OUT)
820 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
821 			    1, 0);
822 		else
823 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
824 			    0, 1);
825 
826 		if (st->bytes[0] != 0) /* first flow from state */
827 			ret = copy_flow_ipfix_4_to_m(&flow4_1, sc);
828 
829 		if (st->bytes[1] != 0) /* second flow from state */
830 			ret = copy_flow_ipfix_4_to_m(&flow4_2, sc);
831 	} else if (sk->af == AF_INET6) {
832 		bzero(&flow6_1, sizeof(flow6_1));
833 		bzero(&flow6_2, sizeof(flow6_2));
834 
835 		if (st->direction == PF_OUT)
836 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
837 			    1, 0);
838 		else
839 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
840 			    0, 1);
841 
842 		if (st->bytes[0] != 0) /* first flow from state */
843 			ret = copy_flow_ipfix_6_to_m(&flow6_1, sc);
844 
845 		if (st->bytes[1] != 0) /* second flow from state */
846 			ret = copy_flow_ipfix_6_to_m(&flow6_2, sc);
847 	}
848 	return (ret);
849 }
850 
851 static void
852 pflow_timeout(void *v)
853 {
854 	struct pflow_softc	*sc = v;
855 
856 	PFLOW_ASSERT(sc);
857 	CURVNET_SET(sc->sc_vnet);
858 
859 	switch (sc->sc_version) {
860 	case PFLOW_PROTO_5:
861 		pflow_sendout_v5(sc);
862 		break;
863 	case PFLOW_PROTO_10:
864 		pflow_sendout_ipfix(sc, AF_INET);
865 		break;
866 	default: /* NOTREACHED */
867 		panic("Unsupported version %d", sc->sc_version);
868 		break;
869 	}
870 
871 	CURVNET_RESTORE();
872 }
873 
874 static void
875 pflow_timeout6(void *v)
876 {
877 	struct pflow_softc	*sc = v;
878 
879 	PFLOW_ASSERT(sc);
880 
881 	if (sc->sc_version != PFLOW_PROTO_10)
882 		return;
883 
884 	CURVNET_SET(sc->sc_vnet);
885 	pflow_sendout_ipfix(sc, AF_INET6);
886 	CURVNET_RESTORE();
887 }
888 
889 static void
890 pflow_timeout_tmpl(void *v)
891 {
892 	struct pflow_softc	*sc = v;
893 
894 	PFLOW_ASSERT(sc);
895 
896 	if (sc->sc_version != PFLOW_PROTO_10)
897 		return;
898 
899 	CURVNET_SET(sc->sc_vnet);
900 	pflow_sendout_ipfix_tmpl(sc);
901 	CURVNET_RESTORE();
902 }
903 
904 static void
905 pflow_flush(struct pflow_softc *sc)
906 {
907 	PFLOW_ASSERT(sc);
908 
909 	switch (sc->sc_version) {
910 	case PFLOW_PROTO_5:
911 		pflow_sendout_v5(sc);
912 		break;
913 	case PFLOW_PROTO_10:
914 		pflow_sendout_ipfix(sc, AF_INET);
915 		pflow_sendout_ipfix(sc, AF_INET6);
916 		break;
917 	default: /* NOTREACHED */
918 		break;
919 	}
920 }
921 
922 static int
923 pflow_sendout_v5(struct pflow_softc *sc)
924 {
925 	struct mbuf		*m = sc->sc_mbuf;
926 	struct pflow_header	*h;
927 	struct timespec		tv;
928 
929 	PFLOW_ASSERT(sc);
930 
931 	if (m == NULL)
932 		return (0);
933 
934 	sc->sc_mbuf = NULL;
935 
936 	V_pflowstats.pflow_packets++;
937 	h = mtod(m, struct pflow_header *);
938 	h->count = htons(sc->sc_count);
939 
940 	/* populate pflow_header */
941 	h->uptime_ms = htonl(time_uptime * 1000);
942 
943 	getnanotime(&tv);
944 	h->time_sec = htonl(tv.tv_sec);			/* XXX 2038 */
945 	h->time_nanosec = htonl(tv.tv_nsec);
946 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
947 		swi_sched(sc->sc_swi_cookie, 0);
948 
949 	return (0);
950 }
951 
952 static int
953 pflow_sendout_ipfix(struct pflow_softc *sc, sa_family_t af)
954 {
955 	struct mbuf			*m;
956 	struct pflow_v10_header		*h10;
957 	struct pflow_set_header		*set_hdr;
958 	u_int32_t			 count;
959 	int				 set_length;
960 
961 	PFLOW_ASSERT(sc);
962 
963 	switch (af) {
964 	case AF_INET:
965 		m = sc->sc_mbuf;
966 		callout_stop(&sc->sc_tmo);
967 		if (m == NULL)
968 			return (0);
969 		sc->sc_mbuf = NULL;
970 		count = sc->sc_count4;
971 		set_length = sizeof(struct pflow_set_header)
972 		    + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4);
973 		break;
974 	case AF_INET6:
975 		m = sc->sc_mbuf6;
976 		callout_stop(&sc->sc_tmo6);
977 		if (m == NULL)
978 			return (0);
979 		sc->sc_mbuf6 = NULL;
980 		count = sc->sc_count6;
981 		set_length = sizeof(struct pflow_set_header)
982 		    + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6);
983 		break;
984 	default:
985 		panic("Unsupported AF %d", af);
986 	}
987 
988 	V_pflowstats.pflow_packets++;
989 	set_hdr = mtod(m, struct pflow_set_header *);
990 	set_hdr->set_length = htons(set_length);
991 
992 	/* populate pflow_header */
993 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
994 	if (m == NULL) {
995 		V_pflowstats.pflow_onomem++;
996 		return (ENOBUFS);
997 	}
998 	h10 = mtod(m, struct pflow_v10_header *);
999 	h10->version = htons(PFLOW_PROTO_10);
1000 	h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length);
1001 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1002 	h10->flow_sequence = htonl(sc->sc_sequence);
1003 	sc->sc_sequence += count;
1004 	h10->observation_dom = htonl(PFLOW_ENGINE_TYPE);
1005 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1006 		swi_sched(sc->sc_swi_cookie, 0);
1007 
1008 	return (0);
1009 }
1010 
1011 static int
1012 pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
1013 {
1014 	struct mbuf			*m;
1015 	struct pflow_v10_header		*h10;
1016 
1017 	PFLOW_ASSERT(sc);
1018 
1019 	m = pflow_get_mbuf(sc, 0);
1020 	if (m == NULL)
1021 		return (0);
1022 	m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
1023 	    (caddr_t)&sc->sc_tmpl_ipfix);
1024 
1025 	V_pflowstats.pflow_packets++;
1026 
1027 	/* populate pflow_header */
1028 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1029 	if (m == NULL) {
1030 		V_pflowstats.pflow_onomem++;
1031 		return (ENOBUFS);
1032 	}
1033 	h10 = mtod(m, struct pflow_v10_header *);
1034 	h10->version = htons(PFLOW_PROTO_10);
1035 	h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct
1036 	    pflow_ipfix_tmpl));
1037 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1038 	h10->flow_sequence = htonl(sc->sc_sequence);
1039 	h10->observation_dom = htonl(PFLOW_ENGINE_TYPE);
1040 
1041 	callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1042 	    pflow_timeout_tmpl, sc);
1043 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1044 		swi_sched(sc->sc_swi_cookie, 0);
1045 
1046 	return (0);
1047 }
1048 
1049 static int
1050 pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
1051 {
1052 	if (sc->so == NULL) {
1053 		m_freem(m);
1054 		return (EINVAL);
1055 	}
1056 	return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread));
1057 }
1058 
1059 static int
1060 pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt)
1061 {
1062 	struct epoch_tracker	 et;
1063 	struct pflow_softc	*sc = NULL;
1064 	struct nl_writer	 *nw = npt->nw;
1065 	int			 error = 0;
1066 
1067 	hdr->nlmsg_flags |= NLM_F_MULTI;
1068 
1069 	NET_EPOCH_ENTER(et);
1070 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1071 		if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1072 			error = ENOMEM;
1073 			goto out;
1074 		}
1075 
1076 		struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1077 		ghdr_new->cmd = PFLOWNL_CMD_LIST;
1078 		ghdr_new->version = 0;
1079 		ghdr_new->reserved = 0;
1080 
1081 		nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id);
1082 
1083 		if (! nlmsg_end(nw)) {
1084 			error = ENOMEM;
1085 			goto out;
1086 		}
1087 	}
1088 
1089 out:
1090 	NET_EPOCH_EXIT(et);
1091 
1092 	if (error != 0)
1093 		nlmsg_abort(nw);
1094 
1095 	return (error);
1096 }
1097 
1098 static int
1099 pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt)
1100 {
1101 	struct nl_writer	 *nw = npt->nw;
1102 	int			 error = 0;
1103 	int			 unit;
1104 
1105 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1106 		return (ENOMEM);
1107 	}
1108 
1109 	struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1110 	ghdr_new->cmd = PFLOWNL_CMD_CREATE;
1111 	ghdr_new->version = 0;
1112 	ghdr_new->reserved = 0;
1113 
1114 	unit = alloc_unr(V_pflow_unr);
1115 
1116 	error = pflow_create(unit);
1117 	if (error != 0) {
1118 		free_unr(V_pflow_unr, unit);
1119 		nlmsg_abort(nw);
1120 		return (error);
1121 	}
1122 
1123 	nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit);
1124 
1125 	if (! nlmsg_end(nw)) {
1126 		pflow_destroy(unit, true);
1127 		return (ENOMEM);
1128 	}
1129 
1130 	return (0);
1131 }
1132 
1133 struct pflow_parsed_del {
1134 	int id;
1135 };
1136 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1137 #define	_OUT(_field)	offsetof(struct pflow_parsed_del, _field)
1138 static const struct nlattr_parser nla_p_del[] = {
1139 	{ .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1140 };
1141 static const struct nlfield_parser nlf_p_del[] = {};
1142 #undef _IN
1143 #undef _OUT
1144 NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del);
1145 
1146 static int
1147 pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt)
1148 {
1149 	struct pflow_parsed_del d = {};
1150 	int error;
1151 
1152 	error = nl_parse_nlmsg(hdr, &del_parser, npt, &d);
1153 	if (error != 0)
1154 		return (error);
1155 
1156 	error = pflow_destroy(d.id, true);
1157 
1158 	return (error);
1159 }
1160 
1161 struct pflow_parsed_get {
1162 	int id;
1163 };
1164 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1165 #define	_OUT(_field)	offsetof(struct pflow_parsed_get, _field)
1166 static const struct nlattr_parser nla_p_get[] = {
1167 	{ .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1168 };
1169 static const struct nlfield_parser nlf_p_get[] = {};
1170 #undef _IN
1171 #undef _OUT
1172 NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get);
1173 
1174 static bool
1175 nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s)
1176 {
1177 	int off = nlattr_add_nested(nw, attr);
1178 	if (off == 0)
1179 		return (false);
1180 
1181 	nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family);
1182 
1183 	switch (s->sa_family) {
1184 	case AF_INET: {
1185 		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
1186 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port);
1187 		nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr);
1188 		break;
1189 	}
1190 	case AF_INET6: {
1191 		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
1192 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port);
1193 		nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr);
1194 		break;
1195 	}
1196 	default:
1197 		panic("Unknown address family %d", s->sa_family);
1198 	}
1199 
1200 	nlattr_set_len(nw, off);
1201 	return (true);
1202 }
1203 
1204 static int
1205 pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
1206 {
1207 	struct epoch_tracker et;
1208 	struct pflow_parsed_get g = {};
1209 	struct pflow_softc *sc = NULL;
1210 	struct nl_writer *nw = npt->nw;
1211 	struct genlmsghdr *ghdr_new;
1212 	int error;
1213 
1214 	error = nl_parse_nlmsg(hdr, &get_parser, npt, &g);
1215 	if (error != 0)
1216 		return (error);
1217 
1218 	NET_EPOCH_ENTER(et);
1219 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1220 		if (sc->sc_id == g.id)
1221 			break;
1222 	}
1223 	if (sc == NULL) {
1224 		error = ENOENT;
1225 		goto out;
1226 	}
1227 
1228 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1229 		nlmsg_abort(nw);
1230 		error = ENOMEM;
1231 		goto out;
1232 	}
1233 
1234 	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1235 	if (ghdr_new == NULL) {
1236 		nlmsg_abort(nw);
1237 		error = ENOMEM;
1238 		goto out;
1239 	}
1240 
1241 	ghdr_new->cmd = PFLOWNL_CMD_GET;
1242 	ghdr_new->version = 0;
1243 	ghdr_new->reserved = 0;
1244 
1245 	nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id);
1246 	nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version);
1247 	if (sc->sc_flowsrc)
1248 		nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc);
1249 	if (sc->sc_flowdst)
1250 		nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst);
1251 
1252 	if (! nlmsg_end(nw)) {
1253 		nlmsg_abort(nw);
1254 		error = ENOMEM;
1255 	}
1256 
1257 out:
1258 	NET_EPOCH_EXIT(et);
1259 
1260 	return (error);
1261 }
1262 
1263 struct pflow_sockaddr {
1264 	union {
1265 		struct sockaddr_in in;
1266 		struct sockaddr_in6 in6;
1267 		struct sockaddr_storage storage;
1268 	};
1269 };
1270 static bool
1271 pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused)
1272 {
1273 	struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args;
1274 
1275 	if (s->storage.ss_family == AF_INET)
1276 		s->storage.ss_len = sizeof(struct sockaddr_in);
1277 	else if (s->storage.ss_family == AF_INET6)
1278 		s->storage.ss_len = sizeof(struct sockaddr_in6);
1279 	else
1280 		return (false);
1281 
1282 	return (true);
1283 }
1284 
1285 #define	_OUT(_field)	offsetof(struct pflow_sockaddr, _field)
1286 static struct nlattr_parser nla_p_sockaddr[] = {
1287 	{ .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 },
1288 	{ .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 },
1289 	{ .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr },
1290 	{ .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr },
1291 };
1292 NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr);
1293 #undef _OUT
1294 
1295 struct pflow_parsed_set {
1296 	int id;
1297 	uint16_t version;
1298 	struct sockaddr_storage src;
1299 	struct sockaddr_storage dst;
1300 };
1301 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1302 #define	_OUT(_field)	offsetof(struct pflow_parsed_set, _field)
1303 static const struct nlattr_parser nla_p_set[] = {
1304 	{ .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1305 	{ .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 },
1306 	{ .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested },
1307 	{ .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested },
1308 };
1309 static const struct nlfield_parser nlf_p_set[] = {};
1310 #undef _IN
1311 #undef _OUT
1312 NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
1313 
1314 static int
1315 pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred)
1316 {
1317 	struct thread		*td;
1318 	struct socket		*so;
1319 	int			 error = 0;
1320 
1321 	td = curthread;
1322 
1323 	PFLOW_ASSERT(sc);
1324 
1325 	if (pflowr->version != 0) {
1326 		switch(pflowr->version) {
1327 		case PFLOW_PROTO_5:
1328 		case PFLOW_PROTO_10:
1329 			break;
1330 		default:
1331 			return(EINVAL);
1332 		}
1333 	}
1334 
1335 	pflow_flush(sc);
1336 
1337 	if (pflowr->dst.ss_len != 0) {
1338 		if (sc->sc_flowdst != NULL &&
1339 		    sc->sc_flowdst->sa_family != pflowr->dst.ss_family) {
1340 			free(sc->sc_flowdst, M_DEVBUF);
1341 			sc->sc_flowdst = NULL;
1342 			if (sc->so != NULL) {
1343 				soclose(sc->so);
1344 				sc->so = NULL;
1345 			}
1346 		}
1347 
1348 		switch (pflowr->dst.ss_family) {
1349 		case AF_INET:
1350 			if (sc->sc_flowdst == NULL) {
1351 				if ((sc->sc_flowdst = malloc(
1352 				    sizeof(struct sockaddr_in),
1353 				    M_DEVBUF,  M_NOWAIT)) == NULL)
1354 					return (ENOMEM);
1355 			}
1356 			memcpy(sc->sc_flowdst, &pflowr->dst,
1357 			    sizeof(struct sockaddr_in));
1358 			sc->sc_flowdst->sa_len = sizeof(struct
1359 			    sockaddr_in);
1360 			break;
1361 		case AF_INET6:
1362 			if (sc->sc_flowdst == NULL) {
1363 				if ((sc->sc_flowdst = malloc(
1364 				    sizeof(struct sockaddr_in6),
1365 				    M_DEVBUF, M_NOWAIT)) == NULL)
1366 					return (ENOMEM);
1367 			}
1368 			memcpy(sc->sc_flowdst, &pflowr->dst,
1369 			    sizeof(struct sockaddr_in6));
1370 			sc->sc_flowdst->sa_len = sizeof(struct
1371 			    sockaddr_in6);
1372 			break;
1373 		default:
1374 			break;
1375 		}
1376 	}
1377 
1378 	if (pflowr->src.ss_len != 0) {
1379 		if (sc->sc_flowsrc != NULL)
1380 			free(sc->sc_flowsrc, M_DEVBUF);
1381 		sc->sc_flowsrc = NULL;
1382 		if (sc->so != NULL) {
1383 			soclose(sc->so);
1384 			sc->so = NULL;
1385 		}
1386 		switch(pflowr->src.ss_family) {
1387 		case AF_INET:
1388 			if ((sc->sc_flowsrc = malloc(
1389 			    sizeof(struct sockaddr_in),
1390 			    M_DEVBUF, M_NOWAIT)) == NULL)
1391 				return (ENOMEM);
1392 			memcpy(sc->sc_flowsrc, &pflowr->src,
1393 			    sizeof(struct sockaddr_in));
1394 			sc->sc_flowsrc->sa_len = sizeof(struct
1395 			    sockaddr_in);
1396 			break;
1397 		case AF_INET6:
1398 			if ((sc->sc_flowsrc = malloc(
1399 			    sizeof(struct sockaddr_in6),
1400 			    M_DEVBUF, M_NOWAIT)) == NULL)
1401 				return (ENOMEM);
1402 			memcpy(sc->sc_flowsrc, &pflowr->src,
1403 			    sizeof(struct sockaddr_in6));
1404 			sc->sc_flowsrc->sa_len = sizeof(struct
1405 			    sockaddr_in6);
1406 			break;
1407 		default:
1408 			break;
1409 		}
1410 	}
1411 
1412 	if (sc->so == NULL) {
1413 		if (pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1414 			error = socreate(sc->sc_flowdst->sa_family,
1415 			    &so, SOCK_DGRAM, IPPROTO_UDP, cred, td);
1416 			if (error)
1417 				return (error);
1418 			if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) {
1419 				error = sobind(so, sc->sc_flowsrc, td);
1420 				if (error) {
1421 					soclose(so);
1422 					return (error);
1423 				}
1424 			}
1425 			sc->so = so;
1426 		}
1427 	} else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1428 		soclose(sc->so);
1429 		sc->so = NULL;
1430 	}
1431 
1432 	/* error check is above */
1433 	if (pflowr->version != 0)
1434 		sc->sc_version = pflowr->version;
1435 
1436 	pflow_setmtu(sc, ETHERMTU);
1437 
1438 	switch (sc->sc_version) {
1439 	case PFLOW_PROTO_5:
1440 		callout_stop(&sc->sc_tmo6);
1441 		callout_stop(&sc->sc_tmo_tmpl);
1442 		break;
1443 	case PFLOW_PROTO_10:
1444 		callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1445 		    pflow_timeout_tmpl, sc);
1446 		break;
1447 	default: /* NOTREACHED */
1448 		break;
1449 	}
1450 
1451 	return (0);
1452 }
1453 
1454 static int
1455 pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
1456 {
1457 	struct epoch_tracker et;
1458 	struct pflow_parsed_set s = {};
1459 	struct pflow_softc *sc = NULL;
1460 	int error;
1461 
1462 	error = nl_parse_nlmsg(hdr, &set_parser, npt, &s);
1463 	if (error != 0)
1464 		return (error);
1465 
1466 	NET_EPOCH_ENTER(et);
1467 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1468 		if (sc->sc_id == s.id)
1469 			break;
1470 	}
1471 	if (sc == NULL) {
1472 		error = ENOENT;
1473 		goto out;
1474 	}
1475 
1476 	PFLOW_LOCK(sc);
1477 	error = pflow_set(sc, &s, nlp_get_cred(npt->nlp));
1478 	PFLOW_UNLOCK(sc);
1479 
1480 out:
1481 	NET_EPOCH_EXIT(et);
1482 	return (error);
1483 }
1484 
1485 static const struct genl_cmd pflow_cmds[] = {
1486 	{
1487 		.cmd_num = PFLOWNL_CMD_LIST,
1488 		.cmd_name = "LIST",
1489 		.cmd_cb = pflow_nl_list,
1490 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1491 		.cmd_priv = PRIV_NETINET_PF,
1492 	},
1493 	{
1494 		.cmd_num = PFLOWNL_CMD_CREATE,
1495 		.cmd_name = "CREATE",
1496 		.cmd_cb = pflow_nl_create,
1497 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1498 		.cmd_priv = PRIV_NETINET_PF,
1499 	},
1500 	{
1501 		.cmd_num = PFLOWNL_CMD_DEL,
1502 		.cmd_name = "DEL",
1503 		.cmd_cb = pflow_nl_del,
1504 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1505 		.cmd_priv = PRIV_NETINET_PF,
1506 	},
1507 	{
1508 		.cmd_num = PFLOWNL_CMD_GET,
1509 		.cmd_name = "GET",
1510 		.cmd_cb = pflow_nl_get,
1511 		.cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1512 		.cmd_priv = PRIV_NETINET_PF,
1513 	},
1514 	{
1515 		.cmd_num = PFLOWNL_CMD_SET,
1516 		.cmd_name = "SET",
1517 		.cmd_cb = pflow_nl_set,
1518 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1519 		.cmd_priv = PRIV_NETINET_PF,
1520 	},
1521 };
1522 
1523 static const struct nlhdr_parser *all_parsers[] = {
1524 	&del_parser,
1525 	&get_parser,
1526 	&set_parser,
1527 };
1528 
1529 static int
1530 pflow_init(void)
1531 {
1532 	bool ret;
1533 	int family_id __diagused;
1534 
1535 	NL_VERIFY_PARSERS(all_parsers);
1536 
1537 	family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX);
1538 	MPASS(family_id != 0);
1539 	ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, NL_ARRAY_LEN(pflow_cmds));
1540 
1541 	return (ret ? 0 : ENODEV);
1542 }
1543 
1544 static void
1545 pflow_uninit(void)
1546 {
1547 	genl_unregister_family(PFLOWNL_FAMILY_NAME);
1548 }
1549 
1550 static int
1551 pflow_modevent(module_t mod, int type, void *data)
1552 {
1553 	int error = 0;
1554 
1555 	switch (type) {
1556 	case MOD_LOAD:
1557 		error = pflow_init();
1558 		break;
1559 	case MOD_UNLOAD:
1560 		pflow_uninit();
1561 		break;
1562 	default:
1563 		error = EINVAL;
1564 		break;
1565 	}
1566 
1567 	return (error);
1568 }
1569 
1570 static moduledata_t pflow_mod = {
1571 	pflowname,
1572 	pflow_modevent,
1573 	0
1574 };
1575 
1576 DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
1577 MODULE_VERSION(pflow, 1);
1578 MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1579