xref: /freebsd/sys/netpfil/pf/pf.c (revision 9ba7351fcfd7ccd6cdb5ca2b774a338ab12b1ee8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  *    - Redistributions of source code must retain the above copyright
14  *      notice, this list of conditions and the following disclaimer.
15  *    - Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials provided
18  *      with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Effort sponsored in part by the Defense Advanced Research Projects
34  * Agency (DARPA) and Air Force Research Laboratory, Air Force
35  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36  *
37  *	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
38  */
39 
40 #include <sys/cdefs.h>
41 #include "opt_bpf.h"
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_pf.h"
45 #include "opt_sctp.h"
46 
47 #include <sys/param.h>
48 #include <sys/bus.h>
49 #include <sys/endian.h>
50 #include <sys/gsb_crc32.h>
51 #include <sys/hash.h>
52 #include <sys/interrupt.h>
53 #include <sys/kernel.h>
54 #include <sys/kthread.h>
55 #include <sys/limits.h>
56 #include <sys/mbuf.h>
57 #include <sys/md5.h>
58 #include <sys/random.h>
59 #include <sys/refcount.h>
60 #include <sys/sdt.h>
61 #include <sys/socket.h>
62 #include <sys/sysctl.h>
63 #include <sys/taskqueue.h>
64 #include <sys/ucred.h>
65 
66 #include <net/if.h>
67 #include <net/if_var.h>
68 #include <net/if_private.h>
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 #include <net/route.h>
72 #include <net/route/nhop.h>
73 #include <net/vnet.h>
74 
75 #include <net/pfil.h>
76 #include <net/pfvar.h>
77 #include <net/if_pflog.h>
78 #include <net/if_pfsync.h>
79 
80 #include <netinet/in_pcb.h>
81 #include <netinet/in_var.h>
82 #include <netinet/in_fib.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_fw.h>
85 #include <netinet/ip_icmp.h>
86 #include <netinet/icmp_var.h>
87 #include <netinet/ip_var.h>
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_fsm.h>
90 #include <netinet/tcp_seq.h>
91 #include <netinet/tcp_timer.h>
92 #include <netinet/tcp_var.h>
93 #include <netinet/udp.h>
94 #include <netinet/udp_var.h>
95 
96 /* dummynet */
97 #include <netinet/ip_dummynet.h>
98 #include <netinet/ip_fw.h>
99 #include <netpfil/ipfw/dn_heap.h>
100 #include <netpfil/ipfw/ip_fw_private.h>
101 #include <netpfil/ipfw/ip_dn_private.h>
102 
103 #ifdef INET6
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
106 #include <netinet6/nd6.h>
107 #include <netinet6/ip6_var.h>
108 #include <netinet6/in6_pcb.h>
109 #include <netinet6/in6_fib.h>
110 #include <netinet6/scope6_var.h>
111 #endif /* INET6 */
112 
113 #include <netinet/sctp_header.h>
114 #include <netinet/sctp_crc32.h>
115 
116 #include <machine/in_cksum.h>
117 #include <security/mac/mac_framework.h>
118 
119 #define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
120 
121 SDT_PROVIDER_DEFINE(pf);
122 SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *",
123     "struct pf_kstate *");
124 SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *",
125     "struct pf_state_key_cmp *", "int", "struct pf_pdesc *",
126     "struct pf_kstate *");
127 SDT_PROBE_DEFINE2(pf, ip, , bound_iface, "struct pf_kstate *",
128     "struct pfi_kkif *");
129 SDT_PROBE_DEFINE4(pf, ip, route_to, entry, "struct mbuf *",
130     "struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
131 SDT_PROBE_DEFINE1(pf, ip, route_to, drop, "int");
132 SDT_PROBE_DEFINE2(pf, ip, route_to, output, "struct ifnet *", "int");
133 SDT_PROBE_DEFINE4(pf, ip6, route_to, entry, "struct mbuf *",
134     "struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *");
135 SDT_PROBE_DEFINE1(pf, ip6, route_to, drop, "int");
136 SDT_PROBE_DEFINE2(pf, ip6, route_to, output, "struct ifnet *", "int");
137 SDT_PROBE_DEFINE4(pf, sctp, multihome, test, "struct pfi_kkif *",
138     "struct pf_krule *", "struct mbuf *", "int");
139 SDT_PROBE_DEFINE2(pf, sctp, multihome, add, "uint32_t",
140     "struct pf_sctp_source *");
141 SDT_PROBE_DEFINE3(pf, sctp, multihome, remove, "uint32_t",
142     "struct pf_kstate *", "struct pf_sctp_source *");
143 
144 SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *",
145     "struct mbuf *");
146 SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *");
147 SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch,
148     "int", "struct pf_keth_rule *", "char *");
149 SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *");
150 SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match,
151     "int", "struct pf_keth_rule *");
152 SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t");
153 
154 /*
155  * Global variables
156  */
157 
158 /* state tables */
159 VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[4]);
160 VNET_DEFINE(struct pf_kpalist,		 pf_pabuf[2]);
161 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
162 VNET_DEFINE(struct pf_altqqueue *,	 pf_altq_ifs_active);
163 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
164 VNET_DEFINE(struct pf_altqqueue *,	 pf_altq_ifs_inactive);
165 VNET_DEFINE(struct pf_kstatus,		 pf_status);
166 
167 VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
168 VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
169 VNET_DEFINE(int,			 altqs_inactive_open);
170 VNET_DEFINE(u_int32_t,			 ticket_pabuf);
171 
172 VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
173 #define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
174 VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
175 #define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
176 VNET_DEFINE(int,			 pf_tcp_secret_init);
177 #define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
178 VNET_DEFINE(int,			 pf_tcp_iss_off);
179 #define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
180 VNET_DECLARE(int,			 pf_vnet_active);
181 #define	V_pf_vnet_active		 VNET(pf_vnet_active)
182 
183 VNET_DEFINE_STATIC(uint32_t, pf_purge_idx);
184 #define V_pf_purge_idx	VNET(pf_purge_idx)
185 
186 #ifdef PF_WANT_32_TO_64_COUNTER
187 VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter);
188 #define	V_pf_counter_periodic_iter	VNET(pf_counter_periodic_iter)
189 
190 VNET_DEFINE(struct allrulelist_head, pf_allrulelist);
191 VNET_DEFINE(size_t, pf_allrulecount);
192 VNET_DEFINE(struct pf_krule *, pf_rulemarker);
193 #endif
194 
195 struct pf_sctp_endpoint;
196 RB_HEAD(pf_sctp_endpoints, pf_sctp_endpoint);
197 struct pf_sctp_source {
198 	sa_family_t			af;
199 	struct pf_addr			addr;
200 	TAILQ_ENTRY(pf_sctp_source)	entry;
201 };
202 TAILQ_HEAD(pf_sctp_sources, pf_sctp_source);
203 struct pf_sctp_endpoint
204 {
205 	uint32_t		 v_tag;
206 	struct pf_sctp_sources	 sources;
207 	RB_ENTRY(pf_sctp_endpoint)	entry;
208 };
209 static int
210 pf_sctp_endpoint_compare(struct pf_sctp_endpoint *a, struct pf_sctp_endpoint *b)
211 {
212 	return (a->v_tag - b->v_tag);
213 }
214 RB_PROTOTYPE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
215 RB_GENERATE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
216 VNET_DEFINE_STATIC(struct pf_sctp_endpoints, pf_sctp_endpoints);
217 #define V_pf_sctp_endpoints	VNET(pf_sctp_endpoints)
218 static struct mtx_padalign pf_sctp_endpoints_mtx;
219 MTX_SYSINIT(pf_sctp_endpoints_mtx, &pf_sctp_endpoints_mtx, "SCTP endpoints", MTX_DEF);
220 #define	PF_SCTP_ENDPOINTS_LOCK()	mtx_lock(&pf_sctp_endpoints_mtx)
221 #define	PF_SCTP_ENDPOINTS_UNLOCK()	mtx_unlock(&pf_sctp_endpoints_mtx)
222 
223 /*
224  * Queue for pf_intr() sends.
225  */
226 static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
227 struct pf_send_entry {
228 	STAILQ_ENTRY(pf_send_entry)	pfse_next;
229 	struct mbuf			*pfse_m;
230 	enum {
231 		PFSE_IP,
232 		PFSE_IP6,
233 		PFSE_ICMP,
234 		PFSE_ICMP6,
235 	}				pfse_type;
236 	struct {
237 		int		type;
238 		int		code;
239 		int		mtu;
240 	} icmpopts;
241 };
242 
243 STAILQ_HEAD(pf_send_head, pf_send_entry);
244 VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue);
245 #define	V_pf_sendqueue	VNET(pf_sendqueue)
246 
247 static struct mtx_padalign pf_sendqueue_mtx;
248 MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
249 #define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
250 #define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
251 
252 /*
253  * Queue for pf_overload_task() tasks.
254  */
255 struct pf_overload_entry {
256 	SLIST_ENTRY(pf_overload_entry)	next;
257 	struct pf_addr  		addr;
258 	sa_family_t			af;
259 	uint8_t				dir;
260 	struct pf_krule  		*rule;
261 };
262 
263 SLIST_HEAD(pf_overload_head, pf_overload_entry);
264 VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue);
265 #define V_pf_overloadqueue	VNET(pf_overloadqueue)
266 VNET_DEFINE_STATIC(struct task, pf_overloadtask);
267 #define	V_pf_overloadtask	VNET(pf_overloadtask)
268 
269 static struct mtx_padalign pf_overloadqueue_mtx;
270 MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
271     "pf overload/flush queue", MTX_DEF);
272 #define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
273 #define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)
274 
275 VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules);
276 struct mtx_padalign pf_unlnkdrules_mtx;
277 MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
278     MTX_DEF);
279 
280 struct sx pf_config_lock;
281 SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config");
282 
283 struct mtx_padalign pf_table_stats_lock;
284 MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats",
285     MTX_DEF);
286 
287 VNET_DEFINE_STATIC(uma_zone_t,	pf_sources_z);
288 #define	V_pf_sources_z	VNET(pf_sources_z)
289 uma_zone_t		pf_mtag_z;
290 VNET_DEFINE(uma_zone_t,	 pf_state_z);
291 VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
292 VNET_DEFINE(uma_zone_t,	 pf_udp_mapping_z);
293 
294 VNET_DEFINE(struct unrhdr64, pf_stateid);
295 
296 static void		 pf_src_tree_remove_state(struct pf_kstate *);
297 static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
298 			    u_int32_t);
299 static void		 pf_add_threshold(struct pf_threshold *);
300 static int		 pf_check_threshold(struct pf_threshold *);
301 
302 static void		 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
303 			    u_int16_t *, u_int16_t *, struct pf_addr *,
304 			    u_int16_t, u_int8_t, sa_family_t, sa_family_t);
305 static int		 pf_modulate_sack(struct pf_pdesc *,
306 			    struct tcphdr *, struct pf_state_peer *);
307 int			 pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
308 			    int *, u_int16_t *, u_int16_t *);
309 static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
310 			    struct pf_addr *, struct pf_addr *, u_int16_t,
311 			    u_int16_t *, u_int16_t *, u_int16_t *,
312 			    u_int16_t *, u_int8_t, sa_family_t);
313 int			 pf_change_icmp_af(struct mbuf *, int,
314 			    struct pf_pdesc *, struct pf_pdesc *,
315 			    struct pf_addr *, struct pf_addr *, sa_family_t,
316 			    sa_family_t);
317 int			 pf_translate_icmp_af(int, void *);
318 static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
319 			    sa_family_t, struct pf_krule *, int);
320 static void		 pf_detach_state(struct pf_kstate *);
321 static int		 pf_state_key_attach(struct pf_state_key *,
322 			    struct pf_state_key *, struct pf_kstate *);
323 static void		 pf_state_key_detach(struct pf_kstate *, int);
324 static int		 pf_state_key_ctor(void *, int, void *, int);
325 static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
326 static __inline void	 pf_dummynet_flag_remove(struct mbuf *m,
327 			    struct pf_mtag *pf_mtag);
328 static int		 pf_dummynet(struct pf_pdesc *, struct pf_kstate *,
329 			    struct pf_krule *, struct mbuf **);
330 static int		 pf_dummynet_route(struct pf_pdesc *,
331 			    struct pf_kstate *, struct pf_krule *,
332 			    struct ifnet *, struct sockaddr *, struct mbuf **);
333 static int		 pf_test_eth_rule(int, struct pfi_kkif *,
334 			    struct mbuf **);
335 static int		 pf_test_rule(struct pf_krule **, struct pf_kstate **,
336 			    struct pf_pdesc *, struct pf_krule **,
337 			    struct pf_kruleset **, struct inpcb *);
338 static int		 pf_create_state(struct pf_krule *, struct pf_krule *,
339 			    struct pf_krule *, struct pf_pdesc *,
340 			    struct pf_state_key *, struct pf_state_key *, int *,
341 			    struct pf_kstate **, int, u_int16_t, u_int16_t,
342 			    struct pf_krule_slist *, struct pf_udp_mapping *);
343 static int		 pf_state_key_addr_setup(struct pf_pdesc *,
344 			    struct pf_state_key_cmp *, int);
345 static int		 pf_tcp_track_full(struct pf_kstate **,
346 			    struct pf_pdesc *, u_short *, int *);
347 static int		 pf_tcp_track_sloppy(struct pf_kstate **,
348 			    struct pf_pdesc *, u_short *);
349 static int		 pf_test_state_tcp(struct pf_kstate **,
350 			    struct pf_pdesc *, u_short *);
351 static int		 pf_test_state_udp(struct pf_kstate **,
352 			    struct pf_pdesc *);
353 int			 pf_icmp_state_lookup(struct pf_state_key_cmp *,
354 			    struct pf_pdesc *, struct pf_kstate **,
355 			    int, u_int16_t, u_int16_t,
356 			    int, int *, int, int);
357 static int		 pf_test_state_icmp(struct pf_kstate **,
358 			    struct pf_pdesc *, u_short *);
359 static void		 pf_sctp_multihome_detach_addr(const struct pf_kstate *);
360 static void		 pf_sctp_multihome_delayed(struct pf_pdesc *,
361 			    struct pfi_kkif *, struct pf_kstate *, int);
362 static int		 pf_test_state_sctp(struct pf_kstate **,
363 			    struct pf_pdesc *, u_short *);
364 static int		 pf_test_state_other(struct pf_kstate **,
365 			    struct pf_pdesc *);
366 static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
367 				int, u_int16_t);
368 static int		 pf_check_proto_cksum(struct mbuf *, int, int,
369 			    u_int8_t, sa_family_t);
370 static int		 pf_walk_option6(struct mbuf *, int, int, uint32_t *,
371 			    u_short *);
372 static void		 pf_print_state_parts(struct pf_kstate *,
373 			    struct pf_state_key *, struct pf_state_key *);
374 static void		 pf_patch_8(struct mbuf *, u_int16_t *, u_int8_t *, u_int8_t,
375 			    bool, u_int8_t);
376 static struct pf_kstate	*pf_find_state(struct pfi_kkif *,
377 			    const struct pf_state_key_cmp *, u_int);
378 static bool		 pf_src_connlimit(struct pf_kstate *);
379 static int		 pf_match_rcvif(struct mbuf *, struct pf_krule *);
380 static void		 pf_counters_inc(int, struct pf_pdesc *,
381 			    struct pf_kstate *, struct pf_krule *,
382 			    struct pf_krule *);
383 static void		 pf_overload_task(void *v, int pending);
384 static u_short		 pf_insert_src_node(struct pf_ksrc_node **,
385 			    struct pf_srchash **, struct pf_krule *,
386 			    struct pf_addr *, sa_family_t, struct pf_addr *,
387 			    struct pfi_kkif *);
388 static u_int		 pf_purge_expired_states(u_int, int);
389 static void		 pf_purge_unlinked_rules(void);
390 static int		 pf_mtag_uminit(void *, int, int);
391 static void		 pf_mtag_free(struct m_tag *);
392 static void		 pf_packet_rework_nat(struct mbuf *, struct pf_pdesc *,
393 			    int, struct pf_state_key *);
394 #ifdef INET
395 static void		 pf_route(struct mbuf **, struct pf_krule *,
396 			    struct ifnet *, struct pf_kstate *,
397 			    struct pf_pdesc *, struct inpcb *);
398 #endif /* INET */
399 #ifdef INET6
400 static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
401 			    struct pf_addr *, u_int8_t);
402 static void		 pf_route6(struct mbuf **, struct pf_krule *,
403 			    struct ifnet *, struct pf_kstate *,
404 			    struct pf_pdesc *, struct inpcb *);
405 #endif /* INET6 */
406 static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t);
407 
408 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
409 
410 extern int pf_end_threads;
411 extern struct proc *pf_purge_proc;
412 
413 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
414 
415 enum { PF_ICMP_MULTI_NONE, PF_ICMP_MULTI_LINK };
416 
417 #define	PACKET_UNDO_NAT(_m, _pd, _off, _s)		\
418 	do {								\
419 		struct pf_state_key *nk;				\
420 		if ((pd->dir) == PF_OUT)					\
421 			nk = (_s)->key[PF_SK_STACK];			\
422 		else							\
423 			nk = (_s)->key[PF_SK_WIRE];			\
424 		pf_packet_rework_nat(_m, _pd, _off, nk);		\
425 	} while (0)
426 
427 #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
428 				 (pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED)
429 
430 #define	STATE_LOOKUP(k, s, pd)						\
431 	do {								\
432 		(s) = pf_find_state((pd->kif), (k), (pd->dir));		\
433 		SDT_PROBE5(pf, ip, state, lookup, pd->kif, k, (pd->dir), pd, (s));	\
434 		if ((s) == NULL)					\
435 			return (PF_DROP);				\
436 		if (PACKET_LOOPED(pd))					\
437 			return (PF_PASS);				\
438 	} while (0)
439 
440 static struct pfi_kkif *
441 BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd)
442 {
443 	struct pfi_kkif *k = pd->kif;
444 
445 	SDT_PROBE2(pf, ip, , bound_iface, st, k);
446 
447 	/* Floating unless otherwise specified. */
448 	if (! (st->rule->rule_flag & PFRULE_IFBOUND))
449 		return (V_pfi_all);
450 
451 	/*
452 	 * Initially set to all, because we don't know what interface we'll be
453 	 * sending this out when we create the state.
454 	 */
455 	if (st->rule->rt == PF_REPLYTO || (pd->af != pd->naf))
456 		return (V_pfi_all);
457 
458 	/* Don't overrule the interface for states created on incoming packets. */
459 	if (st->direction == PF_IN)
460 		return (k);
461 
462 	/* No route-to, so don't overrule. */
463 	if (st->act.rt != PF_ROUTETO)
464 		return (k);
465 
466 	/* Bind to the route-to interface. */
467 	return (st->act.rt_kif);
468 }
469 
470 #define	STATE_INC_COUNTERS(s)						\
471 	do {								\
472 		struct pf_krule_item *mrm;				\
473 		counter_u64_add(s->rule->states_cur, 1);		\
474 		counter_u64_add(s->rule->states_tot, 1);		\
475 		if (s->anchor != NULL) {				\
476 			counter_u64_add(s->anchor->states_cur, 1);	\
477 			counter_u64_add(s->anchor->states_tot, 1);	\
478 		}							\
479 		if (s->nat_rule != NULL) {				\
480 			counter_u64_add(s->nat_rule->states_cur, 1);\
481 			counter_u64_add(s->nat_rule->states_tot, 1);\
482 		}							\
483 		SLIST_FOREACH(mrm, &s->match_rules, entry) {		\
484 			counter_u64_add(mrm->r->states_cur, 1);		\
485 			counter_u64_add(mrm->r->states_tot, 1);		\
486 		}							\
487 	} while (0)
488 
489 #define	STATE_DEC_COUNTERS(s)						\
490 	do {								\
491 		struct pf_krule_item *mrm;				\
492 		if (s->nat_rule != NULL)				\
493 			counter_u64_add(s->nat_rule->states_cur, -1);\
494 		if (s->anchor != NULL)				\
495 			counter_u64_add(s->anchor->states_cur, -1);	\
496 		counter_u64_add(s->rule->states_cur, -1);		\
497 		SLIST_FOREACH(mrm, &s->match_rules, entry)		\
498 			counter_u64_add(mrm->r->states_cur, -1);	\
499 	} while (0)
500 
501 MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
502 MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
503 VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
504 VNET_DEFINE(struct pf_idhash *, pf_idhash);
505 VNET_DEFINE(struct pf_srchash *, pf_srchash);
506 VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
507 VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping);
508 
509 SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
510     "pf(4)");
511 
512 VNET_DEFINE(u_long, pf_hashmask);
513 VNET_DEFINE(u_long, pf_srchashmask);
514 VNET_DEFINE(u_long, pf_udpendpointhashmask);
515 VNET_DEFINE_STATIC(u_long, pf_hashsize);
516 #define V_pf_hashsize	VNET(pf_hashsize)
517 VNET_DEFINE_STATIC(u_long, pf_srchashsize);
518 #define V_pf_srchashsize	VNET(pf_srchashsize)
519 VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize);
520 #define V_pf_udpendpointhashsize	VNET(pf_udpendpointhashsize)
521 u_long	pf_ioctl_maxcount = 65535;
522 
523 SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
524     &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
525 SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
526     &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
527 SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
528     &VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable");
529 SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
530     &pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
531 
532 VNET_DEFINE(void *, pf_swi_cookie);
533 VNET_DEFINE(struct intr_event *, pf_swi_ie);
534 
535 VNET_DEFINE(uint32_t, pf_hashseed);
536 #define	V_pf_hashseed	VNET(pf_hashseed)
537 
538 static void
539 pf_sctp_checksum(struct mbuf *m, int off)
540 {
541 	uint32_t sum = 0;
542 
543 	/* Zero out the checksum, to enable recalculation. */
544 	m_copyback(m, off + offsetof(struct sctphdr, checksum),
545 	    sizeof(sum), (caddr_t)&sum);
546 
547 	sum = sctp_calculate_cksum(m, off);
548 
549 	m_copyback(m, off + offsetof(struct sctphdr, checksum),
550 	    sizeof(sum), (caddr_t)&sum);
551 }
552 
553 int
554 pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
555 {
556 
557 	switch (af) {
558 #ifdef INET
559 	case AF_INET:
560 		if (a->addr32[0] > b->addr32[0])
561 			return (1);
562 		if (a->addr32[0] < b->addr32[0])
563 			return (-1);
564 		break;
565 #endif /* INET */
566 #ifdef INET6
567 	case AF_INET6:
568 		if (a->addr32[3] > b->addr32[3])
569 			return (1);
570 		if (a->addr32[3] < b->addr32[3])
571 			return (-1);
572 		if (a->addr32[2] > b->addr32[2])
573 			return (1);
574 		if (a->addr32[2] < b->addr32[2])
575 			return (-1);
576 		if (a->addr32[1] > b->addr32[1])
577 			return (1);
578 		if (a->addr32[1] < b->addr32[1])
579 			return (-1);
580 		if (a->addr32[0] > b->addr32[0])
581 			return (1);
582 		if (a->addr32[0] < b->addr32[0])
583 			return (-1);
584 		break;
585 #endif /* INET6 */
586 	}
587 	return (0);
588 }
589 
590 static bool
591 pf_is_loopback(sa_family_t af, struct pf_addr *addr)
592 {
593 	switch (af) {
594 #ifdef INET
595 	case AF_INET:
596 		return IN_LOOPBACK(ntohl(addr->v4.s_addr));
597 #endif
598 	case AF_INET6:
599 		return IN6_IS_ADDR_LOOPBACK(&addr->v6);
600 	default:
601 		panic("Unknown af %d", af);
602 	}
603 }
604 
605 static void
606 pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off,
607 	struct pf_state_key *nk)
608 {
609 
610 	switch (pd->proto) {
611 	case IPPROTO_TCP: {
612 		struct tcphdr *th = &pd->hdr.tcp;
613 
614 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
615 			pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum,
616 			    &th->th_sum, &nk->addr[pd->sidx],
617 			    nk->port[pd->sidx], 0, pd->af, pd->naf);
618 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
619 			pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum,
620 			    &th->th_sum, &nk->addr[pd->didx],
621 			    nk->port[pd->didx], 0, pd->af, pd->naf);
622 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
623 		break;
624 	}
625 	case IPPROTO_UDP: {
626 		struct udphdr *uh = &pd->hdr.udp;
627 
628 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
629 			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
630 			    &uh->uh_sum, &nk->addr[pd->sidx],
631 			    nk->port[pd->sidx], 1, pd->af, pd->naf);
632 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
633 			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
634 			    &uh->uh_sum, &nk->addr[pd->didx],
635 			    nk->port[pd->didx], 1, pd->af, pd->naf);
636 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
637 		break;
638 	}
639 	case IPPROTO_SCTP: {
640 		struct sctphdr *sh = &pd->hdr.sctp;
641 		uint16_t checksum = 0;
642 
643 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
644 			pf_change_ap(m, pd->src, &sh->src_port, pd->ip_sum,
645 			    &checksum, &nk->addr[pd->sidx],
646 			    nk->port[pd->sidx], 1, pd->af, pd->naf);
647 		}
648 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
649 			pf_change_ap(m, pd->dst, &sh->dest_port, pd->ip_sum,
650 			    &checksum, &nk->addr[pd->didx],
651 			    nk->port[pd->didx], 1, pd->af, pd->naf);
652 		}
653 
654 		break;
655 	}
656 	case IPPROTO_ICMP: {
657 		struct icmp *ih = &pd->hdr.icmp;
658 
659 		if (nk->port[pd->sidx] != ih->icmp_id) {
660 			pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
661 			    ih->icmp_cksum, ih->icmp_id,
662 			    nk->port[pd->sidx], 0);
663 			ih->icmp_id = nk->port[pd->sidx];
664 			pd->sport = &ih->icmp_id;
665 
666 			m_copyback(m, off, ICMP_MINLEN, (caddr_t)ih);
667 		}
668 		/* FALLTHROUGH */
669 	}
670 	default:
671 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
672 			switch (pd->af) {
673 			case AF_INET:
674 				pf_change_a(&pd->src->v4.s_addr,
675 				    pd->ip_sum, nk->addr[pd->sidx].v4.s_addr,
676 				    0);
677 				break;
678 			case AF_INET6:
679 				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
680 				break;
681 			}
682 		}
683 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
684 			switch (pd->af) {
685 			case AF_INET:
686 				pf_change_a(&pd->dst->v4.s_addr,
687 				    pd->ip_sum, nk->addr[pd->didx].v4.s_addr,
688 				    0);
689 				break;
690 			case AF_INET6:
691 				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
692 				break;
693 			}
694 		}
695 		break;
696 	}
697 }
698 
699 static __inline uint32_t
700 pf_hashkey(const struct pf_state_key *sk)
701 {
702 	uint32_t h;
703 
704 	h = murmur3_32_hash32((const uint32_t *)sk,
705 	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
706 	    V_pf_hashseed);
707 
708 	return (h & V_pf_hashmask);
709 }
710 
711 __inline uint32_t
712 pf_hashsrc(struct pf_addr *addr, sa_family_t af)
713 {
714 	uint32_t h;
715 
716 	switch (af) {
717 	case AF_INET:
718 		h = murmur3_32_hash32((uint32_t *)&addr->v4,
719 		    sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
720 		break;
721 	case AF_INET6:
722 		h = murmur3_32_hash32((uint32_t *)&addr->v6,
723 		    sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
724 		break;
725 	}
726 
727 	return (h & V_pf_srchashmask);
728 }
729 
730 static inline uint32_t
731 pf_hashudpendpoint(struct pf_udp_endpoint *endpoint)
732 {
733 	uint32_t h;
734 
735 	h = murmur3_32_hash32((uint32_t *)endpoint,
736 	    sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t),
737 	    V_pf_hashseed);
738 	return (h & V_pf_udpendpointhashmask);
739 }
740 
741 #ifdef ALTQ
742 static int
743 pf_state_hash(struct pf_kstate *s)
744 {
745 	u_int32_t hv = (intptr_t)s / sizeof(*s);
746 
747 	hv ^= crc32(&s->src, sizeof(s->src));
748 	hv ^= crc32(&s->dst, sizeof(s->dst));
749 	if (hv == 0)
750 		hv = 1;
751 	return (hv);
752 }
753 #endif
754 
755 static __inline void
756 pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate)
757 {
758 	if (which == PF_PEER_DST || which == PF_PEER_BOTH)
759 		s->dst.state = newstate;
760 	if (which == PF_PEER_DST)
761 		return;
762 	if (s->src.state == newstate)
763 		return;
764 	if (s->creatorid == V_pf_status.hostid &&
765 	    s->key[PF_SK_STACK] != NULL &&
766 	    s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
767 	    !(TCPS_HAVEESTABLISHED(s->src.state) ||
768 	    s->src.state == TCPS_CLOSED) &&
769 	    (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
770 		atomic_add_32(&V_pf_status.states_halfopen, -1);
771 
772 	s->src.state = newstate;
773 }
774 
775 #ifdef INET6
776 void
777 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
778 {
779 	switch (af) {
780 #ifdef INET
781 	case AF_INET:
782 		memcpy(&dst->v4, &src->v4, sizeof(dst->v4));
783 		break;
784 #endif /* INET */
785 	case AF_INET6:
786 		memcpy(&dst->v6, &src->v6, sizeof(dst->v6));
787 		break;
788 	}
789 }
790 #endif /* INET6 */
791 
792 static void
793 pf_init_threshold(struct pf_threshold *threshold,
794     u_int32_t limit, u_int32_t seconds)
795 {
796 	threshold->limit = limit * PF_THRESHOLD_MULT;
797 	threshold->seconds = seconds;
798 	threshold->count = 0;
799 	threshold->last = time_uptime;
800 }
801 
802 static void
803 pf_add_threshold(struct pf_threshold *threshold)
804 {
805 	u_int32_t t = time_uptime, diff = t - threshold->last;
806 
807 	if (diff >= threshold->seconds)
808 		threshold->count = 0;
809 	else
810 		threshold->count -= threshold->count * diff /
811 		    threshold->seconds;
812 	threshold->count += PF_THRESHOLD_MULT;
813 	threshold->last = t;
814 }
815 
816 static int
817 pf_check_threshold(struct pf_threshold *threshold)
818 {
819 	return (threshold->count > threshold->limit);
820 }
821 
822 static bool
823 pf_src_connlimit(struct pf_kstate *state)
824 {
825 	struct pf_overload_entry *pfoe;
826 	bool limited = false;
827 
828 	PF_STATE_LOCK_ASSERT(state);
829 	PF_SRC_NODE_LOCK(state->src_node);
830 
831 	state->src_node->conn++;
832 	state->src.tcp_est = 1;
833 	pf_add_threshold(&state->src_node->conn_rate);
834 
835 	if (state->rule->max_src_conn &&
836 	    state->rule->max_src_conn <
837 	    state->src_node->conn) {
838 		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
839 		limited = true;
840 	}
841 
842 	if (state->rule->max_src_conn_rate.limit &&
843 	    pf_check_threshold(&state->src_node->conn_rate)) {
844 		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
845 		limited = true;
846 	}
847 
848 	if (!limited)
849 		goto done;
850 
851 	/* Kill this state. */
852 	state->timeout = PFTM_PURGE;
853 	pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED);
854 
855 	if (state->rule->overload_tbl == NULL)
856 		goto done;
857 
858 	/* Schedule overloading and flushing task. */
859 	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
860 	if (pfoe == NULL)
861 		goto done;  /* too bad :( */
862 
863 	bcopy(&state->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
864 	pfoe->af = state->key[PF_SK_WIRE]->af;
865 	pfoe->rule = state->rule;
866 	pfoe->dir = state->direction;
867 	PF_OVERLOADQ_LOCK();
868 	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
869 	PF_OVERLOADQ_UNLOCK();
870 	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
871 
872 done:
873 	PF_SRC_NODE_UNLOCK(state->src_node);
874 	return (limited);
875 }
876 
877 static void
878 pf_overload_task(void *v, int pending)
879 {
880 	struct pf_overload_head queue;
881 	struct pfr_addr p;
882 	struct pf_overload_entry *pfoe, *pfoe1;
883 	uint32_t killed = 0;
884 
885 	CURVNET_SET((struct vnet *)v);
886 
887 	PF_OVERLOADQ_LOCK();
888 	queue = V_pf_overloadqueue;
889 	SLIST_INIT(&V_pf_overloadqueue);
890 	PF_OVERLOADQ_UNLOCK();
891 
892 	bzero(&p, sizeof(p));
893 	SLIST_FOREACH(pfoe, &queue, next) {
894 		counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
895 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
896 			printf("%s: blocking address ", __func__);
897 			pf_print_host(&pfoe->addr, 0, pfoe->af);
898 			printf("\n");
899 		}
900 
901 		p.pfra_af = pfoe->af;
902 		switch (pfoe->af) {
903 #ifdef INET
904 		case AF_INET:
905 			p.pfra_net = 32;
906 			p.pfra_ip4addr = pfoe->addr.v4;
907 			break;
908 #endif
909 #ifdef INET6
910 		case AF_INET6:
911 			p.pfra_net = 128;
912 			p.pfra_ip6addr = pfoe->addr.v6;
913 			break;
914 #endif
915 		}
916 
917 		PF_RULES_WLOCK();
918 		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
919 		PF_RULES_WUNLOCK();
920 	}
921 
922 	/*
923 	 * Remove those entries, that don't need flushing.
924 	 */
925 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
926 		if (pfoe->rule->flush == 0) {
927 			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
928 			free(pfoe, M_PFTEMP);
929 		} else
930 			counter_u64_add(
931 			    V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
932 
933 	/* If nothing to flush, return. */
934 	if (SLIST_EMPTY(&queue)) {
935 		CURVNET_RESTORE();
936 		return;
937 	}
938 
939 	for (int i = 0; i <= V_pf_hashmask; i++) {
940 		struct pf_idhash *ih = &V_pf_idhash[i];
941 		struct pf_state_key *sk;
942 		struct pf_kstate *s;
943 
944 		PF_HASHROW_LOCK(ih);
945 		LIST_FOREACH(s, &ih->states, entry) {
946 		    sk = s->key[PF_SK_WIRE];
947 		    SLIST_FOREACH(pfoe, &queue, next)
948 			if (sk->af == pfoe->af &&
949 			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
950 			    pfoe->rule == s->rule) &&
951 			    ((pfoe->dir == PF_OUT &&
952 			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
953 			    (pfoe->dir == PF_IN &&
954 			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
955 				s->timeout = PFTM_PURGE;
956 				pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
957 				killed++;
958 			}
959 		}
960 		PF_HASHROW_UNLOCK(ih);
961 	}
962 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
963 		free(pfoe, M_PFTEMP);
964 	if (V_pf_status.debug >= PF_DEBUG_MISC)
965 		printf("%s: %u states killed", __func__, killed);
966 
967 	CURVNET_RESTORE();
968 }
969 
970 /*
971  * On node found always returns locked. On not found its configurable.
972  */
973 struct pf_ksrc_node *
974 pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af,
975 	struct pf_srchash **sh, bool returnlocked)
976 {
977 	struct pf_ksrc_node *n;
978 
979 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
980 
981 	*sh = &V_pf_srchash[pf_hashsrc(src, af)];
982 	PF_HASHROW_LOCK(*sh);
983 	LIST_FOREACH(n, &(*sh)->nodes, entry)
984 		if (n->rule == rule && n->af == af &&
985 		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
986 		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
987 			break;
988 
989 	if (n == NULL && !returnlocked)
990 		PF_HASHROW_UNLOCK(*sh);
991 
992 	return (n);
993 }
994 
995 bool
996 pf_src_node_exists(struct pf_ksrc_node **sn, struct pf_srchash *sh)
997 {
998 	struct pf_ksrc_node	*cur;
999 
1000 	if ((*sn) == NULL)
1001 		return (false);
1002 
1003 	KASSERT(sh != NULL, ("%s: sh is NULL", __func__));
1004 
1005 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
1006 	PF_HASHROW_LOCK(sh);
1007 	LIST_FOREACH(cur, &(sh->nodes), entry) {
1008 		if (cur == (*sn) &&
1009 		    cur->expire != 1) /* Ignore nodes being killed */
1010 			return (true);
1011 	}
1012 	PF_HASHROW_UNLOCK(sh);
1013 	(*sn) = NULL;
1014 	return (false);
1015 }
1016 
1017 static void
1018 pf_free_src_node(struct pf_ksrc_node *sn)
1019 {
1020 
1021 	for (int i = 0; i < 2; i++) {
1022 		counter_u64_free(sn->bytes[i]);
1023 		counter_u64_free(sn->packets[i]);
1024 	}
1025 	uma_zfree(V_pf_sources_z, sn);
1026 }
1027 
1028 static u_short
1029 pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_srchash **sh,
1030     struct pf_krule *rule, struct pf_addr *src, sa_family_t af,
1031     struct pf_addr *raddr, struct pfi_kkif *rkif)
1032 {
1033 	u_short			 reason = 0;
1034 
1035 	KASSERT((rule->rule_flag & PFRULE_SRCTRACK ||
1036 	    rule->rdr.opts & PF_POOL_STICKYADDR),
1037 	    ("%s for non-tracking rule %p", __func__, rule));
1038 
1039 	/*
1040 	 * Request the sh to always be locked, as we might insert a new sn.
1041 	 */
1042 	if (*sn == NULL)
1043 		*sn = pf_find_src_node(src, rule, af, sh, true);
1044 
1045 	if (*sn == NULL) {
1046 		PF_HASHROW_ASSERT(*sh);
1047 
1048 		if (rule->max_src_nodes &&
1049 		    counter_u64_fetch(rule->src_nodes) >= rule->max_src_nodes) {
1050 			counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1);
1051 			reason = PFRES_SRCLIMIT;
1052 			goto done;
1053 		}
1054 
1055 		(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
1056 		if ((*sn) == NULL) {
1057 			reason = PFRES_MEMORY;
1058 			goto done;
1059 		}
1060 
1061 		for (int i = 0; i < 2; i++) {
1062 			(*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT);
1063 			(*sn)->packets[i] = counter_u64_alloc(M_NOWAIT);
1064 
1065 			if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) {
1066 				pf_free_src_node(*sn);
1067 				reason = PFRES_MEMORY;
1068 				goto done;
1069 			}
1070 		}
1071 
1072 		pf_init_threshold(&(*sn)->conn_rate,
1073 		    rule->max_src_conn_rate.limit,
1074 		    rule->max_src_conn_rate.seconds);
1075 
1076 		MPASS((*sn)->lock == NULL);
1077 		(*sn)->lock = &(*sh)->lock;
1078 
1079 		(*sn)->af = af;
1080 		(*sn)->rule = rule;
1081 		PF_ACPY(&(*sn)->addr, src, af);
1082 		PF_ACPY(&(*sn)->raddr, raddr, af);
1083 		(*sn)->rkif = rkif;
1084 		LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry);
1085 		(*sn)->creation = time_uptime;
1086 		(*sn)->ruletype = rule->action;
1087 		if ((*sn)->rule != NULL)
1088 			counter_u64_add((*sn)->rule->src_nodes, 1);
1089 		counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
1090 	} else {
1091 		if (rule->max_src_states &&
1092 		    (*sn)->states >= rule->max_src_states) {
1093 			counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
1094 			    1);
1095 			reason = PFRES_SRCLIMIT;
1096 			goto done;
1097 		}
1098 	}
1099 done:
1100 	if (reason == 0)
1101 		(*sn)->states++;
1102 	else
1103 		(*sn) = NULL;
1104 
1105 	PF_HASHROW_UNLOCK(*sh);
1106 	return (reason);
1107 }
1108 
1109 void
1110 pf_unlink_src_node(struct pf_ksrc_node *src)
1111 {
1112 	PF_SRC_NODE_LOCK_ASSERT(src);
1113 
1114 	LIST_REMOVE(src, entry);
1115 	if (src->rule)
1116 		counter_u64_add(src->rule->src_nodes, -1);
1117 }
1118 
1119 u_int
1120 pf_free_src_nodes(struct pf_ksrc_node_list *head)
1121 {
1122 	struct pf_ksrc_node *sn, *tmp;
1123 	u_int count = 0;
1124 
1125 	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
1126 		pf_free_src_node(sn);
1127 		count++;
1128 	}
1129 
1130 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
1131 
1132 	return (count);
1133 }
1134 
1135 void
1136 pf_mtag_initialize(void)
1137 {
1138 
1139 	pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
1140 	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
1141 	    UMA_ALIGN_PTR, 0);
1142 }
1143 
1144 /* Per-vnet data storage structures initialization. */
1145 void
1146 pf_initialize(void)
1147 {
1148 	struct pf_keyhash	*kh;
1149 	struct pf_idhash	*ih;
1150 	struct pf_srchash	*sh;
1151 	struct pf_udpendpointhash	*uh;
1152 	u_int i;
1153 
1154 	if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
1155 		V_pf_hashsize = PF_HASHSIZ;
1156 	if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
1157 		V_pf_srchashsize = PF_SRCHASHSIZ;
1158 	if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize))
1159 		V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
1160 
1161 	V_pf_hashseed = arc4random();
1162 
1163 	/* States and state keys storage. */
1164 	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate),
1165 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1166 	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
1167 	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
1168 	uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
1169 
1170 	V_pf_state_key_z = uma_zcreate("pf state keys",
1171 	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
1172 	    UMA_ALIGN_PTR, 0);
1173 
1174 	V_pf_keyhash = mallocarray(V_pf_hashsize, sizeof(struct pf_keyhash),
1175 	    M_PFHASH, M_NOWAIT | M_ZERO);
1176 	V_pf_idhash = mallocarray(V_pf_hashsize, sizeof(struct pf_idhash),
1177 	    M_PFHASH, M_NOWAIT | M_ZERO);
1178 	if (V_pf_keyhash == NULL || V_pf_idhash == NULL) {
1179 		printf("pf: Unable to allocate memory for "
1180 		    "state_hashsize %lu.\n", V_pf_hashsize);
1181 
1182 		free(V_pf_keyhash, M_PFHASH);
1183 		free(V_pf_idhash, M_PFHASH);
1184 
1185 		V_pf_hashsize = PF_HASHSIZ;
1186 		V_pf_keyhash = mallocarray(V_pf_hashsize,
1187 		    sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO);
1188 		V_pf_idhash = mallocarray(V_pf_hashsize,
1189 		    sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO);
1190 	}
1191 
1192 	V_pf_hashmask = V_pf_hashsize - 1;
1193 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
1194 	    i++, kh++, ih++) {
1195 		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
1196 		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
1197 	}
1198 
1199 	/* Source nodes. */
1200 	V_pf_sources_z = uma_zcreate("pf source nodes",
1201 	    sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1202 	    0);
1203 	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
1204 	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
1205 	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
1206 
1207 	V_pf_srchash = mallocarray(V_pf_srchashsize,
1208 	    sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO);
1209 	if (V_pf_srchash == NULL) {
1210 		printf("pf: Unable to allocate memory for "
1211 		    "source_hashsize %lu.\n", V_pf_srchashsize);
1212 
1213 		V_pf_srchashsize = PF_SRCHASHSIZ;
1214 		V_pf_srchash = mallocarray(V_pf_srchashsize,
1215 		    sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO);
1216 	}
1217 
1218 	V_pf_srchashmask = V_pf_srchashsize - 1;
1219 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
1220 		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
1221 
1222 
1223 	/* UDP endpoint mappings. */
1224 	V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings",
1225 	    sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL,
1226 	    UMA_ALIGN_PTR, 0);
1227 	V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
1228 	    sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO);
1229 	if (V_pf_udpendpointhash == NULL) {
1230 		printf("pf: Unable to allocate memory for "
1231 		    "udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize);
1232 
1233 		V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
1234 		V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
1235 		    sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO);
1236 	}
1237 
1238 	V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1;
1239 	for (i = 0, uh = V_pf_udpendpointhash;
1240 	    i <= V_pf_udpendpointhashmask;
1241 	    i++, uh++) {
1242 		mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
1243 		    MTX_DEF | MTX_DUPOK);
1244 	}
1245 
1246 	/* ALTQ */
1247 	TAILQ_INIT(&V_pf_altqs[0]);
1248 	TAILQ_INIT(&V_pf_altqs[1]);
1249 	TAILQ_INIT(&V_pf_altqs[2]);
1250 	TAILQ_INIT(&V_pf_altqs[3]);
1251 	TAILQ_INIT(&V_pf_pabuf[0]);
1252 	TAILQ_INIT(&V_pf_pabuf[1]);
1253 	V_pf_altqs_active = &V_pf_altqs[0];
1254 	V_pf_altq_ifs_active = &V_pf_altqs[1];
1255 	V_pf_altqs_inactive = &V_pf_altqs[2];
1256 	V_pf_altq_ifs_inactive = &V_pf_altqs[3];
1257 
1258 	/* Send & overload+flush queues. */
1259 	STAILQ_INIT(&V_pf_sendqueue);
1260 	SLIST_INIT(&V_pf_overloadqueue);
1261 	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
1262 
1263 	/* Unlinked, but may be referenced rules. */
1264 	TAILQ_INIT(&V_pf_unlinked_rules);
1265 }
1266 
1267 void
1268 pf_mtag_cleanup(void)
1269 {
1270 
1271 	uma_zdestroy(pf_mtag_z);
1272 }
1273 
1274 void
1275 pf_cleanup(void)
1276 {
1277 	struct pf_keyhash	*kh;
1278 	struct pf_idhash	*ih;
1279 	struct pf_srchash	*sh;
1280 	struct pf_udpendpointhash	*uh;
1281 	struct pf_send_entry	*pfse, *next;
1282 	u_int i;
1283 
1284 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash;
1285 	    i <= V_pf_hashmask;
1286 	    i++, kh++, ih++) {
1287 		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
1288 		    __func__));
1289 		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
1290 		    __func__));
1291 		mtx_destroy(&kh->lock);
1292 		mtx_destroy(&ih->lock);
1293 	}
1294 	free(V_pf_keyhash, M_PFHASH);
1295 	free(V_pf_idhash, M_PFHASH);
1296 
1297 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
1298 		KASSERT(LIST_EMPTY(&sh->nodes),
1299 		    ("%s: source node hash not empty", __func__));
1300 		mtx_destroy(&sh->lock);
1301 	}
1302 	free(V_pf_srchash, M_PFHASH);
1303 
1304 	for (i = 0, uh = V_pf_udpendpointhash;
1305 	    i <= V_pf_udpendpointhashmask;
1306 	    i++, uh++) {
1307 		KASSERT(LIST_EMPTY(&uh->endpoints),
1308 		    ("%s: udp endpoint hash not empty", __func__));
1309 		mtx_destroy(&uh->lock);
1310 	}
1311 	free(V_pf_udpendpointhash, M_PFHASH);
1312 
1313 	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
1314 		m_freem(pfse->pfse_m);
1315 		free(pfse, M_PFTEMP);
1316 	}
1317 	MPASS(RB_EMPTY(&V_pf_sctp_endpoints));
1318 
1319 	uma_zdestroy(V_pf_sources_z);
1320 	uma_zdestroy(V_pf_state_z);
1321 	uma_zdestroy(V_pf_state_key_z);
1322 	uma_zdestroy(V_pf_udp_mapping_z);
1323 }
1324 
1325 static int
1326 pf_mtag_uminit(void *mem, int size, int how)
1327 {
1328 	struct m_tag *t;
1329 
1330 	t = (struct m_tag *)mem;
1331 	t->m_tag_cookie = MTAG_ABI_COMPAT;
1332 	t->m_tag_id = PACKET_TAG_PF;
1333 	t->m_tag_len = sizeof(struct pf_mtag);
1334 	t->m_tag_free = pf_mtag_free;
1335 
1336 	return (0);
1337 }
1338 
1339 static void
1340 pf_mtag_free(struct m_tag *t)
1341 {
1342 
1343 	uma_zfree(pf_mtag_z, t);
1344 }
1345 
1346 struct pf_mtag *
1347 pf_get_mtag(struct mbuf *m)
1348 {
1349 	struct m_tag *mtag;
1350 
1351 	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
1352 		return ((struct pf_mtag *)(mtag + 1));
1353 
1354 	mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
1355 	if (mtag == NULL)
1356 		return (NULL);
1357 	bzero(mtag + 1, sizeof(struct pf_mtag));
1358 	m_tag_prepend(m, mtag);
1359 
1360 	return ((struct pf_mtag *)(mtag + 1));
1361 }
1362 
1363 static int
1364 pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
1365     struct pf_kstate *s)
1366 {
1367 	struct pf_keyhash	*khs, *khw, *kh;
1368 	struct pf_state_key	*sk, *cur;
1369 	struct pf_kstate	*si, *olds = NULL;
1370 	int idx;
1371 
1372 	NET_EPOCH_ASSERT();
1373 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1374 	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
1375 	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
1376 
1377 	/*
1378 	 * We need to lock hash slots of both keys. To avoid deadlock
1379 	 * we always lock the slot with lower address first. Unlock order
1380 	 * isn't important.
1381 	 *
1382 	 * We also need to lock ID hash slot before dropping key
1383 	 * locks. On success we return with ID hash slot locked.
1384 	 */
1385 
1386 	if (skw == sks) {
1387 		khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
1388 		PF_HASHROW_LOCK(khs);
1389 	} else {
1390 		khs = &V_pf_keyhash[pf_hashkey(sks)];
1391 		khw = &V_pf_keyhash[pf_hashkey(skw)];
1392 		if (khs == khw) {
1393 			PF_HASHROW_LOCK(khs);
1394 		} else if (khs < khw) {
1395 			PF_HASHROW_LOCK(khs);
1396 			PF_HASHROW_LOCK(khw);
1397 		} else {
1398 			PF_HASHROW_LOCK(khw);
1399 			PF_HASHROW_LOCK(khs);
1400 		}
1401 	}
1402 
1403 #define	KEYS_UNLOCK()	do {			\
1404 	if (khs != khw) {			\
1405 		PF_HASHROW_UNLOCK(khs);		\
1406 		PF_HASHROW_UNLOCK(khw);		\
1407 	} else					\
1408 		PF_HASHROW_UNLOCK(khs);		\
1409 } while (0)
1410 
1411 	/*
1412 	 * First run: start with wire key.
1413 	 */
1414 	sk = skw;
1415 	kh = khw;
1416 	idx = PF_SK_WIRE;
1417 
1418 	MPASS(s->lock == NULL);
1419 	s->lock = &V_pf_idhash[PF_IDHASH(s)].lock;
1420 
1421 keyattach:
1422 	LIST_FOREACH(cur, &kh->keys, entry)
1423 		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
1424 			break;
1425 
1426 	if (cur != NULL) {
1427 		/* Key exists. Check for same kif, if none, add to key. */
1428 		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
1429 			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
1430 
1431 			PF_HASHROW_LOCK(ih);
1432 			if (si->kif == s->kif &&
1433 			    ((si->key[PF_SK_WIRE]->af == sk->af &&
1434 			    si->direction == s->direction) ||
1435 			    (si->key[PF_SK_WIRE]->af !=
1436 			    si->key[PF_SK_STACK]->af &&
1437 			    sk->af == si->key[PF_SK_STACK]->af &&
1438 			    si->direction != s->direction))) {
1439 				if (sk->proto == IPPROTO_TCP &&
1440 				    si->src.state >= TCPS_FIN_WAIT_2 &&
1441 				    si->dst.state >= TCPS_FIN_WAIT_2) {
1442 					/*
1443 					 * New state matches an old >FIN_WAIT_2
1444 					 * state. We can't drop key hash locks,
1445 					 * thus we can't unlink it properly.
1446 					 *
1447 					 * As a workaround we drop it into
1448 					 * TCPS_CLOSED state, schedule purge
1449 					 * ASAP and push it into the very end
1450 					 * of the slot TAILQ, so that it won't
1451 					 * conflict with our new state.
1452 					 */
1453 					pf_set_protostate(si, PF_PEER_BOTH,
1454 					    TCPS_CLOSED);
1455 					si->timeout = PFTM_PURGE;
1456 					olds = si;
1457 				} else {
1458 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
1459 						printf("pf: %s key attach "
1460 						    "failed on %s: ",
1461 						    (idx == PF_SK_WIRE) ?
1462 						    "wire" : "stack",
1463 						    s->kif->pfik_name);
1464 						pf_print_state_parts(s,
1465 						    (idx == PF_SK_WIRE) ?
1466 						    sk : NULL,
1467 						    (idx == PF_SK_STACK) ?
1468 						    sk : NULL);
1469 						printf(", existing: ");
1470 						pf_print_state_parts(si,
1471 						    (idx == PF_SK_WIRE) ?
1472 						    sk : NULL,
1473 						    (idx == PF_SK_STACK) ?
1474 						    sk : NULL);
1475 						printf("\n");
1476 					}
1477 					s->timeout = PFTM_UNLINKED;
1478 					PF_HASHROW_UNLOCK(ih);
1479 					KEYS_UNLOCK();
1480 					if (idx == PF_SK_WIRE) {
1481 						uma_zfree(V_pf_state_key_z, skw);
1482 						if (skw != sks)
1483 							uma_zfree(V_pf_state_key_z, sks);
1484 					} else {
1485 						pf_detach_state(s);
1486 					}
1487 					return (EEXIST); /* collision! */
1488 				}
1489 			}
1490 			PF_HASHROW_UNLOCK(ih);
1491 		}
1492 		uma_zfree(V_pf_state_key_z, sk);
1493 		s->key[idx] = cur;
1494 	} else {
1495 		LIST_INSERT_HEAD(&kh->keys, sk, entry);
1496 		s->key[idx] = sk;
1497 	}
1498 
1499 stateattach:
1500 	/* List is sorted, if-bound states before floating. */
1501 	if (s->kif == V_pfi_all)
1502 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
1503 	else
1504 		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
1505 
1506 	if (olds) {
1507 		TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
1508 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
1509 		    key_list[idx]);
1510 		olds = NULL;
1511 	}
1512 
1513 	/*
1514 	 * Attach done. See how should we (or should not?)
1515 	 * attach a second key.
1516 	 */
1517 	if (sks == skw) {
1518 		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
1519 		idx = PF_SK_STACK;
1520 		sks = NULL;
1521 		goto stateattach;
1522 	} else if (sks != NULL) {
1523 		/*
1524 		 * Continue attaching with stack key.
1525 		 */
1526 		sk = sks;
1527 		kh = khs;
1528 		idx = PF_SK_STACK;
1529 		sks = NULL;
1530 		goto keyattach;
1531 	}
1532 
1533 	PF_STATE_LOCK(s);
1534 	KEYS_UNLOCK();
1535 
1536 	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
1537 	    ("%s failure", __func__));
1538 
1539 	return (0);
1540 #undef	KEYS_UNLOCK
1541 }
1542 
1543 static void
1544 pf_detach_state(struct pf_kstate *s)
1545 {
1546 	struct pf_state_key *sks = s->key[PF_SK_STACK];
1547 	struct pf_keyhash *kh;
1548 
1549 	NET_EPOCH_ASSERT();
1550 	MPASS(s->timeout >= PFTM_MAX);
1551 
1552 	pf_sctp_multihome_detach_addr(s);
1553 
1554 	if ((s->state_flags & PFSTATE_PFLOW) && V_pflow_export_state_ptr)
1555 		V_pflow_export_state_ptr(s);
1556 
1557 	if (sks != NULL) {
1558 		kh = &V_pf_keyhash[pf_hashkey(sks)];
1559 		PF_HASHROW_LOCK(kh);
1560 		if (s->key[PF_SK_STACK] != NULL)
1561 			pf_state_key_detach(s, PF_SK_STACK);
1562 		/*
1563 		 * If both point to same key, then we are done.
1564 		 */
1565 		if (sks == s->key[PF_SK_WIRE]) {
1566 			pf_state_key_detach(s, PF_SK_WIRE);
1567 			PF_HASHROW_UNLOCK(kh);
1568 			return;
1569 		}
1570 		PF_HASHROW_UNLOCK(kh);
1571 	}
1572 
1573 	if (s->key[PF_SK_WIRE] != NULL) {
1574 		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
1575 		PF_HASHROW_LOCK(kh);
1576 		if (s->key[PF_SK_WIRE] != NULL)
1577 			pf_state_key_detach(s, PF_SK_WIRE);
1578 		PF_HASHROW_UNLOCK(kh);
1579 	}
1580 }
1581 
1582 static void
1583 pf_state_key_detach(struct pf_kstate *s, int idx)
1584 {
1585 	struct pf_state_key *sk = s->key[idx];
1586 #ifdef INVARIANTS
1587 	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
1588 
1589 	PF_HASHROW_ASSERT(kh);
1590 #endif
1591 	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
1592 	s->key[idx] = NULL;
1593 
1594 	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
1595 		LIST_REMOVE(sk, entry);
1596 		uma_zfree(V_pf_state_key_z, sk);
1597 	}
1598 }
1599 
1600 static int
1601 pf_state_key_ctor(void *mem, int size, void *arg, int flags)
1602 {
1603 	struct pf_state_key *sk = mem;
1604 
1605 	bzero(sk, sizeof(struct pf_state_key_cmp));
1606 	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
1607 	TAILQ_INIT(&sk->states[PF_SK_STACK]);
1608 
1609 	return (0);
1610 }
1611 
1612 static int
1613 pf_state_key_addr_setup(struct pf_pdesc *pd,
1614     struct pf_state_key_cmp *key, int multi)
1615 {
1616 	struct pf_addr *saddr = pd->src;
1617 	struct pf_addr *daddr = pd->dst;
1618 #ifdef INET6
1619 	struct nd_neighbor_solicit nd;
1620 	struct pf_addr *target;
1621 	u_short action, reason;
1622 
1623 	if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6)
1624 		goto copy;
1625 
1626 	switch (pd->hdr.icmp6.icmp6_type) {
1627 	case ND_NEIGHBOR_SOLICIT:
1628 		if (multi)
1629 			return (-1);
1630 		if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af))
1631 			return (-1);
1632 		target = (struct pf_addr *)&nd.nd_ns_target;
1633 		daddr = target;
1634 		break;
1635 	case ND_NEIGHBOR_ADVERT:
1636 		if (multi)
1637 			return (-1);
1638 		if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af))
1639 			return (-1);
1640 		target = (struct pf_addr *)&nd.nd_ns_target;
1641 		saddr = target;
1642 		if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
1643 			key->addr[pd->didx].addr32[0] = 0;
1644 			key->addr[pd->didx].addr32[1] = 0;
1645 			key->addr[pd->didx].addr32[2] = 0;
1646 			key->addr[pd->didx].addr32[3] = 0;
1647 			daddr = NULL; /* overwritten */
1648 		}
1649 		break;
1650 	default:
1651 		if (multi == PF_ICMP_MULTI_LINK) {
1652 			key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL;
1653 			key->addr[pd->sidx].addr32[1] = 0;
1654 			key->addr[pd->sidx].addr32[2] = 0;
1655 			key->addr[pd->sidx].addr32[3] = IPV6_ADDR_INT32_ONE;
1656 			saddr = NULL; /* overwritten */
1657 		}
1658 	}
1659 copy:
1660 #endif
1661 	if (saddr)
1662 		PF_ACPY(&key->addr[pd->sidx], saddr, pd->af);
1663 	if (daddr)
1664 		PF_ACPY(&key->addr[pd->didx], daddr, pd->af);
1665 
1666 	return (0);
1667 }
1668 
1669 int
1670 pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport,
1671     struct pf_state_key **sk, struct pf_state_key **nk)
1672 {
1673 	*sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1674 	if (*sk == NULL)
1675 		return (ENOMEM);
1676 
1677 	if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)*sk,
1678 	    0)) {
1679 		uma_zfree(V_pf_state_key_z, *sk);
1680 		*sk = NULL;
1681 		return (ENOMEM);
1682 	}
1683 
1684 	(*sk)->port[pd->sidx] = sport;
1685 	(*sk)->port[pd->didx] = dport;
1686 	(*sk)->proto = pd->proto;
1687 	(*sk)->af = pd->af;
1688 
1689 	*nk = pf_state_key_clone(*sk);
1690 	if (*nk == NULL) {
1691 		uma_zfree(V_pf_state_key_z, *sk);
1692 		*sk = NULL;
1693 		return (ENOMEM);
1694 	}
1695 
1696 	if (pd->af != pd->naf) {
1697 		(*sk)->port[pd->sidx] = pd->osport;
1698 		(*sk)->port[pd->didx] = pd->odport;
1699 
1700 		(*nk)->af = pd->naf;
1701 
1702 		/*
1703 		 * We're overwriting an address here, so potentially there's bits of an IPv6
1704 		 * address left in here. Clear that out first.
1705 		 */
1706 		bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0]));
1707 		bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1]));
1708 
1709 		PF_ACPY(&(*nk)->addr[pd->af == pd->naf ? pd->sidx : pd->didx],
1710 		    &pd->nsaddr, pd->naf);
1711 		PF_ACPY(&(*nk)->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
1712 		    &pd->ndaddr, pd->naf);
1713 		(*nk)->port[pd->af == pd->naf ? pd->sidx : pd->didx] = pd->nsport;
1714 		(*nk)->port[pd->af == pd->naf ? pd->didx : pd->sidx] = pd->ndport;
1715 		switch (pd->proto) {
1716 		case IPPROTO_ICMP:
1717 			(*nk)->proto = IPPROTO_ICMPV6;
1718 			break;
1719 		case IPPROTO_ICMPV6:
1720 			(*nk)->proto = IPPROTO_ICMP;
1721 			break;
1722 		default:
1723 			(*nk)->proto = pd->proto;
1724 		}
1725 	}
1726 
1727 	return (0);
1728 }
1729 
1730 struct pf_state_key *
1731 pf_state_key_clone(const struct pf_state_key *orig)
1732 {
1733 	struct pf_state_key *sk;
1734 
1735 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
1736 	if (sk == NULL)
1737 		return (NULL);
1738 
1739 	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
1740 
1741 	return (sk);
1742 }
1743 
1744 int
1745 pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif,
1746     struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s)
1747 {
1748 	struct pf_idhash *ih;
1749 	struct pf_kstate *cur;
1750 	int error;
1751 
1752 	NET_EPOCH_ASSERT();
1753 
1754 	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
1755 	    ("%s: sks not pristine", __func__));
1756 	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
1757 	    ("%s: skw not pristine", __func__));
1758 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
1759 
1760 	s->kif = kif;
1761 	s->orig_kif = orig_kif;
1762 
1763 	if (s->id == 0 && s->creatorid == 0) {
1764 		s->id = alloc_unr64(&V_pf_stateid);
1765 		s->id = htobe64(s->id);
1766 		s->creatorid = V_pf_status.hostid;
1767 	}
1768 
1769 	/* Returns with ID locked on success. */
1770 	if ((error = pf_state_key_attach(skw, sks, s)) != 0)
1771 		return (error);
1772 
1773 	ih = &V_pf_idhash[PF_IDHASH(s)];
1774 	PF_HASHROW_ASSERT(ih);
1775 	LIST_FOREACH(cur, &ih->states, entry)
1776 		if (cur->id == s->id && cur->creatorid == s->creatorid)
1777 			break;
1778 
1779 	if (cur != NULL) {
1780 		s->timeout = PFTM_UNLINKED;
1781 		PF_HASHROW_UNLOCK(ih);
1782 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
1783 			printf("pf: state ID collision: "
1784 			    "id: %016llx creatorid: %08x\n",
1785 			    (unsigned long long)be64toh(s->id),
1786 			    ntohl(s->creatorid));
1787 		}
1788 		pf_detach_state(s);
1789 		return (EEXIST);
1790 	}
1791 	LIST_INSERT_HEAD(&ih->states, s, entry);
1792 	/* One for keys, one for ID hash. */
1793 	refcount_init(&s->refs, 2);
1794 
1795 	pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
1796 	if (V_pfsync_insert_state_ptr != NULL)
1797 		V_pfsync_insert_state_ptr(s);
1798 
1799 	/* Returns locked. */
1800 	return (0);
1801 }
1802 
1803 /*
1804  * Find state by ID: returns with locked row on success.
1805  */
1806 struct pf_kstate *
1807 pf_find_state_byid(uint64_t id, uint32_t creatorid)
1808 {
1809 	struct pf_idhash *ih;
1810 	struct pf_kstate *s;
1811 
1812 	pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1813 
1814 	ih = &V_pf_idhash[(be64toh(id) % (V_pf_hashmask + 1))];
1815 
1816 	PF_HASHROW_LOCK(ih);
1817 	LIST_FOREACH(s, &ih->states, entry)
1818 		if (s->id == id && s->creatorid == creatorid)
1819 			break;
1820 
1821 	if (s == NULL)
1822 		PF_HASHROW_UNLOCK(ih);
1823 
1824 	return (s);
1825 }
1826 
1827 /*
1828  * Find state by key.
1829  * Returns with ID hash slot locked on success.
1830  */
1831 static struct pf_kstate *
1832 pf_find_state(struct pfi_kkif *kif, const struct pf_state_key_cmp *key,
1833     u_int dir)
1834 {
1835 	struct pf_keyhash	*kh;
1836 	struct pf_state_key	*sk;
1837 	struct pf_kstate	*s;
1838 	int idx;
1839 
1840 	pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1841 
1842 	kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
1843 
1844 	PF_HASHROW_LOCK(kh);
1845 	LIST_FOREACH(sk, &kh->keys, entry)
1846 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1847 			break;
1848 	if (sk == NULL) {
1849 		PF_HASHROW_UNLOCK(kh);
1850 		return (NULL);
1851 	}
1852 
1853 	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
1854 
1855 	/* List is sorted, if-bound states before floating ones. */
1856 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
1857 		if (s->kif == V_pfi_all || s->kif == kif || s->orig_kif == kif) {
1858 			PF_STATE_LOCK(s);
1859 			PF_HASHROW_UNLOCK(kh);
1860 			if (__predict_false(s->timeout >= PFTM_MAX)) {
1861 				/*
1862 				 * State is either being processed by
1863 				 * pf_unlink_state() in an other thread, or
1864 				 * is scheduled for immediate expiry.
1865 				 */
1866 				PF_STATE_UNLOCK(s);
1867 				return (NULL);
1868 			}
1869 			return (s);
1870 		}
1871 
1872 	/* Look through the other list, in case of AF-TO */
1873 	idx = idx == PF_SK_WIRE ? PF_SK_STACK : PF_SK_WIRE;
1874 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
1875 		if (s->key[PF_SK_WIRE]->af == s->key[PF_SK_STACK]->af)
1876 			continue;
1877 		if (s->kif == V_pfi_all || s->kif == kif || s->orig_kif == kif) {
1878 			PF_STATE_LOCK(s);
1879 			PF_HASHROW_UNLOCK(kh);
1880 			if (__predict_false(s->timeout >= PFTM_MAX)) {
1881 				/*
1882 				 * State is either being processed by
1883 				 * pf_unlink_state() in an other thread, or
1884 				 * is scheduled for immediate expiry.
1885 				 */
1886 				PF_STATE_UNLOCK(s);
1887 				return (NULL);
1888 			}
1889 			return (s);
1890 		}
1891 	}
1892 
1893 	PF_HASHROW_UNLOCK(kh);
1894 
1895 	return (NULL);
1896 }
1897 
1898 /*
1899  * Returns with ID hash slot locked on success.
1900  */
1901 struct pf_kstate *
1902 pf_find_state_all(const struct pf_state_key_cmp *key, u_int dir, int *more)
1903 {
1904 	struct pf_keyhash	*kh;
1905 	struct pf_state_key	*sk;
1906 	struct pf_kstate	*s, *ret = NULL;
1907 	int			 idx, inout = 0;
1908 
1909 	if (more != NULL)
1910 		*more = 0;
1911 
1912 	pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
1913 
1914 	kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)];
1915 
1916 	PF_HASHROW_LOCK(kh);
1917 	LIST_FOREACH(sk, &kh->keys, entry)
1918 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
1919 			break;
1920 	if (sk == NULL) {
1921 		PF_HASHROW_UNLOCK(kh);
1922 		return (NULL);
1923 	}
1924 	switch (dir) {
1925 	case PF_IN:
1926 		idx = PF_SK_WIRE;
1927 		break;
1928 	case PF_OUT:
1929 		idx = PF_SK_STACK;
1930 		break;
1931 	case PF_INOUT:
1932 		idx = PF_SK_WIRE;
1933 		inout = 1;
1934 		break;
1935 	default:
1936 		panic("%s: dir %u", __func__, dir);
1937 	}
1938 second_run:
1939 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
1940 		if (more == NULL) {
1941 			PF_STATE_LOCK(s);
1942 			PF_HASHROW_UNLOCK(kh);
1943 			return (s);
1944 		}
1945 
1946 		if (ret)
1947 			(*more)++;
1948 		else {
1949 			ret = s;
1950 			PF_STATE_LOCK(s);
1951 		}
1952 	}
1953 	if (inout == 1) {
1954 		inout = 0;
1955 		idx = PF_SK_STACK;
1956 		goto second_run;
1957 	}
1958 	PF_HASHROW_UNLOCK(kh);
1959 
1960 	return (ret);
1961 }
1962 
1963 /*
1964  * FIXME
1965  * This routine is inefficient -- locks the state only to unlock immediately on
1966  * return.
1967  * It is racy -- after the state is unlocked nothing stops other threads from
1968  * removing it.
1969  */
1970 bool
1971 pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir)
1972 {
1973 	struct pf_kstate *s;
1974 
1975 	s = pf_find_state_all(key, dir, NULL);
1976 	if (s != NULL) {
1977 		PF_STATE_UNLOCK(s);
1978 		return (true);
1979 	}
1980 	return (false);
1981 }
1982 
1983 struct pf_udp_mapping *
1984 pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
1985     struct pf_addr *nat_addr, uint16_t nat_port)
1986 {
1987 	struct pf_udp_mapping *mapping;
1988 
1989 	mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
1990 	if (mapping == NULL)
1991 		return (NULL);
1992 	PF_ACPY(&mapping->endpoints[0].addr, src_addr, af);
1993 	mapping->endpoints[0].port = src_port;
1994 	mapping->endpoints[0].af = af;
1995 	mapping->endpoints[0].mapping = mapping;
1996 	PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af);
1997 	mapping->endpoints[1].port = nat_port;
1998 	mapping->endpoints[1].af = af;
1999 	mapping->endpoints[1].mapping = mapping;
2000 	refcount_init(&mapping->refs, 1);
2001 	return (mapping);
2002 }
2003 
2004 int
2005 pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
2006 {
2007 	struct pf_udpendpointhash *h0, *h1;
2008 	struct pf_udp_endpoint *endpoint;
2009 	int ret = EEXIST;
2010 
2011 	h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
2012 	h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
2013 	if (h0 == h1) {
2014 		PF_HASHROW_LOCK(h0);
2015 	} else if (h0 < h1) {
2016 		PF_HASHROW_LOCK(h0);
2017 		PF_HASHROW_LOCK(h1);
2018 	} else {
2019 		PF_HASHROW_LOCK(h1);
2020 		PF_HASHROW_LOCK(h0);
2021 	}
2022 
2023 	LIST_FOREACH(endpoint, &h0->endpoints, entry) {
2024 		if (bcmp(endpoint, &mapping->endpoints[0],
2025 		    sizeof(struct pf_udp_endpoint_cmp)) == 0)
2026 			break;
2027 	}
2028 	if (endpoint != NULL)
2029 		goto cleanup;
2030 	LIST_FOREACH(endpoint, &h1->endpoints, entry) {
2031 		if (bcmp(endpoint, &mapping->endpoints[1],
2032 		    sizeof(struct pf_udp_endpoint_cmp)) == 0)
2033 			break;
2034 	}
2035 	if (endpoint != NULL)
2036 		goto cleanup;
2037 	LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
2038 	LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
2039 	ret = 0;
2040 
2041 cleanup:
2042 	if (h0 != h1) {
2043 		PF_HASHROW_UNLOCK(h0);
2044 		PF_HASHROW_UNLOCK(h1);
2045 	} else {
2046 		PF_HASHROW_UNLOCK(h0);
2047 	}
2048 	return (ret);
2049 }
2050 
2051 void
2052 pf_udp_mapping_release(struct pf_udp_mapping *mapping)
2053 {
2054 	/* refcount is synchronized on the source endpoint's row lock */
2055 	struct pf_udpendpointhash *h0, *h1;
2056 
2057 	if (mapping == NULL)
2058 		return;
2059 
2060 	h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
2061 	PF_HASHROW_LOCK(h0);
2062 	if (refcount_release(&mapping->refs)) {
2063 		LIST_REMOVE(&mapping->endpoints[0], entry);
2064 		PF_HASHROW_UNLOCK(h0);
2065 		h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
2066 		PF_HASHROW_LOCK(h1);
2067 		LIST_REMOVE(&mapping->endpoints[1], entry);
2068 		PF_HASHROW_UNLOCK(h1);
2069 
2070 		uma_zfree(V_pf_udp_mapping_z, mapping);
2071 	} else {
2072 			PF_HASHROW_UNLOCK(h0);
2073 	}
2074 }
2075 
2076 
2077 struct pf_udp_mapping *
2078 pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
2079 {
2080 	struct pf_udpendpointhash *uh;
2081 	struct pf_udp_endpoint *endpoint;
2082 
2083 	uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
2084 
2085 	PF_HASHROW_LOCK(uh);
2086 	LIST_FOREACH(endpoint, &uh->endpoints, entry) {
2087 		if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
2088 			bcmp(endpoint, &endpoint->mapping->endpoints[0],
2089 			    sizeof(struct pf_udp_endpoint_cmp)) == 0)
2090 			break;
2091 	}
2092 	if (endpoint == NULL) {
2093 		PF_HASHROW_UNLOCK(uh);
2094 		return (NULL);
2095 	}
2096 	refcount_acquire(&endpoint->mapping->refs);
2097 	PF_HASHROW_UNLOCK(uh);
2098 	return (endpoint->mapping);
2099 }
2100 /* END state table stuff */
2101 
2102 static void
2103 pf_send(struct pf_send_entry *pfse)
2104 {
2105 
2106 	PF_SENDQ_LOCK();
2107 	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
2108 	PF_SENDQ_UNLOCK();
2109 	swi_sched(V_pf_swi_cookie, 0);
2110 }
2111 
2112 static bool
2113 pf_isforlocal(struct mbuf *m, int af)
2114 {
2115 	switch (af) {
2116 #ifdef INET
2117 	case AF_INET: {
2118 		struct ip *ip = mtod(m, struct ip *);
2119 
2120 		return (in_localip(ip->ip_dst));
2121 	}
2122 #endif
2123 #ifdef INET6
2124 	case AF_INET6: {
2125 		struct ip6_hdr *ip6;
2126 		struct in6_ifaddr *ia;
2127 		ip6 = mtod(m, struct ip6_hdr *);
2128 		ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
2129 		if (ia == NULL)
2130 			return (false);
2131 		return (! (ia->ia6_flags & IN6_IFF_NOTREADY));
2132 	}
2133 #endif
2134 	}
2135 
2136 	return (false);
2137 }
2138 
2139 int
2140 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type,
2141     int *icmp_dir, int *multi, u_int16_t *virtual_id, u_int16_t *virtual_type)
2142 {
2143 	/*
2144 	 * ICMP types marked with PF_OUT are typically responses to
2145 	 * PF_IN, and will match states in the opposite direction.
2146 	 * PF_IN ICMP types need to match a state with that type.
2147 	 */
2148 	*icmp_dir = PF_OUT;
2149 	*multi = PF_ICMP_MULTI_LINK;
2150 	/* Queries (and responses) */
2151 	switch (pd->af) {
2152 #ifdef INET
2153 	case AF_INET:
2154 		switch (type) {
2155 		case ICMP_ECHO:
2156 			*icmp_dir = PF_IN;
2157 		case ICMP_ECHOREPLY:
2158 			*virtual_type = ICMP_ECHO;
2159 			*virtual_id = pd->hdr.icmp.icmp_id;
2160 			break;
2161 
2162 		case ICMP_TSTAMP:
2163 			*icmp_dir = PF_IN;
2164 		case ICMP_TSTAMPREPLY:
2165 			*virtual_type = ICMP_TSTAMP;
2166 			*virtual_id = pd->hdr.icmp.icmp_id;
2167 			break;
2168 
2169 		case ICMP_IREQ:
2170 			*icmp_dir = PF_IN;
2171 		case ICMP_IREQREPLY:
2172 			*virtual_type = ICMP_IREQ;
2173 			*virtual_id = pd->hdr.icmp.icmp_id;
2174 			break;
2175 
2176 		case ICMP_MASKREQ:
2177 			*icmp_dir = PF_IN;
2178 		case ICMP_MASKREPLY:
2179 			*virtual_type = ICMP_MASKREQ;
2180 			*virtual_id = pd->hdr.icmp.icmp_id;
2181 			break;
2182 
2183 		case ICMP_IPV6_WHEREAREYOU:
2184 			*icmp_dir = PF_IN;
2185 		case ICMP_IPV6_IAMHERE:
2186 			*virtual_type = ICMP_IPV6_WHEREAREYOU;
2187 			*virtual_id = 0; /* Nothing sane to match on! */
2188 			break;
2189 
2190 		case ICMP_MOBILE_REGREQUEST:
2191 			*icmp_dir = PF_IN;
2192 		case ICMP_MOBILE_REGREPLY:
2193 			*virtual_type = ICMP_MOBILE_REGREQUEST;
2194 			*virtual_id = 0; /* Nothing sane to match on! */
2195 			break;
2196 
2197 		case ICMP_ROUTERSOLICIT:
2198 			*icmp_dir = PF_IN;
2199 		case ICMP_ROUTERADVERT:
2200 			*virtual_type = ICMP_ROUTERSOLICIT;
2201 			*virtual_id = 0; /* Nothing sane to match on! */
2202 			break;
2203 
2204 		/* These ICMP types map to other connections */
2205 		case ICMP_UNREACH:
2206 		case ICMP_SOURCEQUENCH:
2207 		case ICMP_REDIRECT:
2208 		case ICMP_TIMXCEED:
2209 		case ICMP_PARAMPROB:
2210 			/* These will not be used, but set them anyway */
2211 			*icmp_dir = PF_IN;
2212 			*virtual_type = type;
2213 			*virtual_id = 0;
2214 			HTONS(*virtual_type);
2215 			return (1);  /* These types match to another state */
2216 
2217 		/*
2218 		 * All remaining ICMP types get their own states,
2219 		 * and will only match in one direction.
2220 		 */
2221 		default:
2222 			*icmp_dir = PF_IN;
2223 			*virtual_type = type;
2224 			*virtual_id = 0;
2225 			break;
2226 		}
2227 		break;
2228 #endif /* INET */
2229 #ifdef INET6
2230 	case AF_INET6:
2231 		switch (type) {
2232 		case ICMP6_ECHO_REQUEST:
2233 			*icmp_dir = PF_IN;
2234 		case ICMP6_ECHO_REPLY:
2235 			*virtual_type = ICMP6_ECHO_REQUEST;
2236 			*virtual_id = pd->hdr.icmp6.icmp6_id;
2237 			break;
2238 
2239 		case MLD_LISTENER_QUERY:
2240 		case MLD_LISTENER_REPORT: {
2241 			/*
2242 			 * Listener Report can be sent by clients
2243 			 * without an associated Listener Query.
2244 			 * In addition to that, when Report is sent as a
2245 			 * reply to a Query its source and destination
2246 			 * address are different.
2247 			 */
2248 			*icmp_dir = PF_IN;
2249 			*virtual_type = MLD_LISTENER_QUERY;
2250 			*virtual_id = 0;
2251 			break;
2252 		}
2253 		case MLD_MTRACE:
2254 			*icmp_dir = PF_IN;
2255 		case MLD_MTRACE_RESP:
2256 			*virtual_type = MLD_MTRACE;
2257 			*virtual_id = 0; /* Nothing sane to match on! */
2258 			break;
2259 
2260 		case ND_NEIGHBOR_SOLICIT:
2261 			*icmp_dir = PF_IN;
2262 		case ND_NEIGHBOR_ADVERT: {
2263 			*virtual_type = ND_NEIGHBOR_SOLICIT;
2264 			*virtual_id = 0;
2265 			break;
2266 		}
2267 
2268 		/*
2269 		 * These ICMP types map to other connections.
2270 		 * ND_REDIRECT can't be in this list because the triggering
2271 		 * packet header is optional.
2272 		 */
2273 		case ICMP6_DST_UNREACH:
2274 		case ICMP6_PACKET_TOO_BIG:
2275 		case ICMP6_TIME_EXCEEDED:
2276 		case ICMP6_PARAM_PROB:
2277 			/* These will not be used, but set them anyway */
2278 			*icmp_dir = PF_IN;
2279 			*virtual_type = type;
2280 			*virtual_id = 0;
2281 			HTONS(*virtual_type);
2282 			return (1);  /* These types match to another state */
2283 		/*
2284 		 * All remaining ICMP6 types get their own states,
2285 		 * and will only match in one direction.
2286 		 */
2287 		default:
2288 			*icmp_dir = PF_IN;
2289 			*virtual_type = type;
2290 			*virtual_id = 0;
2291 			break;
2292 		}
2293 		break;
2294 #endif /* INET6 */
2295 	}
2296 	HTONS(*virtual_type);
2297 	return (0);  /* These types match to their own state */
2298 }
2299 
2300 void
2301 pf_intr(void *v)
2302 {
2303 	struct epoch_tracker et;
2304 	struct pf_send_head queue;
2305 	struct pf_send_entry *pfse, *next;
2306 
2307 	CURVNET_SET((struct vnet *)v);
2308 
2309 	PF_SENDQ_LOCK();
2310 	queue = V_pf_sendqueue;
2311 	STAILQ_INIT(&V_pf_sendqueue);
2312 	PF_SENDQ_UNLOCK();
2313 
2314 	NET_EPOCH_ENTER(et);
2315 
2316 	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
2317 		switch (pfse->pfse_type) {
2318 #ifdef INET
2319 		case PFSE_IP: {
2320 			if (pf_isforlocal(pfse->pfse_m, AF_INET)) {
2321 				KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
2322 				    ("%s: rcvif != loif", __func__));
2323 
2324 				pfse->pfse_m->m_flags |= M_SKIP_FIREWALL;
2325 				pfse->pfse_m->m_pkthdr.csum_flags |=
2326 				    CSUM_IP_VALID | CSUM_IP_CHECKED;
2327 				ip_input(pfse->pfse_m);
2328 			} else {
2329 				ip_output(pfse->pfse_m, NULL, NULL, 0, NULL,
2330 				    NULL);
2331 			}
2332 			break;
2333 		}
2334 		case PFSE_ICMP:
2335 			icmp_error(pfse->pfse_m, pfse->icmpopts.type,
2336 			    pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
2337 			break;
2338 #endif /* INET */
2339 #ifdef INET6
2340 		case PFSE_IP6:
2341 			if (pf_isforlocal(pfse->pfse_m, AF_INET6)) {
2342 				KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif,
2343 				    ("%s: rcvif != loif", __func__));
2344 
2345 				pfse->pfse_m->m_flags |= M_SKIP_FIREWALL |
2346 				    M_LOOP;
2347 				ip6_input(pfse->pfse_m);
2348 			} else {
2349 				ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL,
2350 				    NULL, NULL);
2351 			}
2352 			break;
2353 		case PFSE_ICMP6:
2354 			icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
2355 			    pfse->icmpopts.code, pfse->icmpopts.mtu);
2356 			break;
2357 #endif /* INET6 */
2358 		default:
2359 			panic("%s: unknown type", __func__);
2360 		}
2361 		free(pfse, M_PFTEMP);
2362 	}
2363 	NET_EPOCH_EXIT(et);
2364 	CURVNET_RESTORE();
2365 }
2366 
2367 #define	pf_purge_thread_period	(hz / 10)
2368 
2369 #ifdef PF_WANT_32_TO_64_COUNTER
2370 static void
2371 pf_status_counter_u64_periodic(void)
2372 {
2373 
2374 	PF_RULES_RASSERT();
2375 
2376 	if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) {
2377 		return;
2378 	}
2379 
2380 	for (int i = 0; i < FCNT_MAX; i++) {
2381 		pf_counter_u64_periodic(&V_pf_status.fcounters[i]);
2382 	}
2383 }
2384 
2385 static void
2386 pf_kif_counter_u64_periodic(void)
2387 {
2388 	struct pfi_kkif *kif;
2389 	size_t r, run;
2390 
2391 	PF_RULES_RASSERT();
2392 
2393 	if (__predict_false(V_pf_allkifcount == 0)) {
2394 		return;
2395 	}
2396 
2397 	if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
2398 		return;
2399 	}
2400 
2401 	run = V_pf_allkifcount / 10;
2402 	if (run < 5)
2403 		run = 5;
2404 
2405 	for (r = 0; r < run; r++) {
2406 		kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist);
2407 		if (kif == NULL) {
2408 			LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
2409 			LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
2410 			break;
2411 		}
2412 
2413 		LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
2414 		LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist);
2415 
2416 		for (int i = 0; i < 2; i++) {
2417 			for (int j = 0; j < 2; j++) {
2418 				for (int k = 0; k < 2; k++) {
2419 					pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]);
2420 					pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]);
2421 				}
2422 			}
2423 		}
2424 	}
2425 }
2426 
2427 static void
2428 pf_rule_counter_u64_periodic(void)
2429 {
2430 	struct pf_krule *rule;
2431 	size_t r, run;
2432 
2433 	PF_RULES_RASSERT();
2434 
2435 	if (__predict_false(V_pf_allrulecount == 0)) {
2436 		return;
2437 	}
2438 
2439 	if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) {
2440 		return;
2441 	}
2442 
2443 	run = V_pf_allrulecount / 10;
2444 	if (run < 5)
2445 		run = 5;
2446 
2447 	for (r = 0; r < run; r++) {
2448 		rule = LIST_NEXT(V_pf_rulemarker, allrulelist);
2449 		if (rule == NULL) {
2450 			LIST_REMOVE(V_pf_rulemarker, allrulelist);
2451 			LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
2452 			break;
2453 		}
2454 
2455 		LIST_REMOVE(V_pf_rulemarker, allrulelist);
2456 		LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist);
2457 
2458 		pf_counter_u64_periodic(&rule->evaluations);
2459 		for (int i = 0; i < 2; i++) {
2460 			pf_counter_u64_periodic(&rule->packets[i]);
2461 			pf_counter_u64_periodic(&rule->bytes[i]);
2462 		}
2463 	}
2464 }
2465 
2466 static void
2467 pf_counter_u64_periodic_main(void)
2468 {
2469 	PF_RULES_RLOCK_TRACKER;
2470 
2471 	V_pf_counter_periodic_iter++;
2472 
2473 	PF_RULES_RLOCK();
2474 	pf_counter_u64_critical_enter();
2475 	pf_status_counter_u64_periodic();
2476 	pf_kif_counter_u64_periodic();
2477 	pf_rule_counter_u64_periodic();
2478 	pf_counter_u64_critical_exit();
2479 	PF_RULES_RUNLOCK();
2480 }
2481 #else
2482 #define	pf_counter_u64_periodic_main()	do { } while (0)
2483 #endif
2484 
2485 void
2486 pf_purge_thread(void *unused __unused)
2487 {
2488 	struct epoch_tracker	 et;
2489 
2490 	VNET_ITERATOR_DECL(vnet_iter);
2491 
2492 	sx_xlock(&pf_end_lock);
2493 	while (pf_end_threads == 0) {
2494 		sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period);
2495 
2496 		VNET_LIST_RLOCK();
2497 		NET_EPOCH_ENTER(et);
2498 		VNET_FOREACH(vnet_iter) {
2499 			CURVNET_SET(vnet_iter);
2500 
2501 			/* Wait until V_pf_default_rule is initialized. */
2502 			if (V_pf_vnet_active == 0) {
2503 				CURVNET_RESTORE();
2504 				continue;
2505 			}
2506 
2507 			pf_counter_u64_periodic_main();
2508 
2509 			/*
2510 			 *  Process 1/interval fraction of the state
2511 			 * table every run.
2512 			 */
2513 			V_pf_purge_idx =
2514 			    pf_purge_expired_states(V_pf_purge_idx, V_pf_hashmask /
2515 			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
2516 
2517 			/*
2518 			 * Purge other expired types every
2519 			 * PFTM_INTERVAL seconds.
2520 			 */
2521 			if (V_pf_purge_idx == 0) {
2522 				/*
2523 				 * Order is important:
2524 				 * - states and src nodes reference rules
2525 				 * - states and rules reference kifs
2526 				 */
2527 				pf_purge_expired_fragments();
2528 				pf_purge_expired_src_nodes();
2529 				pf_purge_unlinked_rules();
2530 				pfi_kkif_purge();
2531 			}
2532 			CURVNET_RESTORE();
2533 		}
2534 		NET_EPOCH_EXIT(et);
2535 		VNET_LIST_RUNLOCK();
2536 	}
2537 
2538 	pf_end_threads++;
2539 	sx_xunlock(&pf_end_lock);
2540 	kproc_exit(0);
2541 }
2542 
2543 void
2544 pf_unload_vnet_purge(void)
2545 {
2546 
2547 	/*
2548 	 * To cleanse up all kifs and rules we need
2549 	 * two runs: first one clears reference flags,
2550 	 * then pf_purge_expired_states() doesn't
2551 	 * raise them, and then second run frees.
2552 	 */
2553 	pf_purge_unlinked_rules();
2554 	pfi_kkif_purge();
2555 
2556 	/*
2557 	 * Now purge everything.
2558 	 */
2559 	pf_purge_expired_states(0, V_pf_hashmask);
2560 	pf_purge_fragments(UINT_MAX);
2561 	pf_purge_expired_src_nodes();
2562 
2563 	/*
2564 	 * Now all kifs & rules should be unreferenced,
2565 	 * thus should be successfully freed.
2566 	 */
2567 	pf_purge_unlinked_rules();
2568 	pfi_kkif_purge();
2569 }
2570 
2571 u_int32_t
2572 pf_state_expires(const struct pf_kstate *state)
2573 {
2574 	u_int32_t	timeout;
2575 	u_int32_t	start;
2576 	u_int32_t	end;
2577 	u_int32_t	states;
2578 
2579 	/* handle all PFTM_* > PFTM_MAX here */
2580 	if (state->timeout == PFTM_PURGE)
2581 		return (time_uptime);
2582 	KASSERT(state->timeout != PFTM_UNLINKED,
2583 	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
2584 	KASSERT((state->timeout < PFTM_MAX),
2585 	    ("pf_state_expires: timeout > PFTM_MAX"));
2586 	timeout = state->rule->timeout[state->timeout];
2587 	if (!timeout)
2588 		timeout = V_pf_default_rule.timeout[state->timeout];
2589 	start = state->rule->timeout[PFTM_ADAPTIVE_START];
2590 	if (start && state->rule != &V_pf_default_rule) {
2591 		end = state->rule->timeout[PFTM_ADAPTIVE_END];
2592 		states = counter_u64_fetch(state->rule->states_cur);
2593 	} else {
2594 		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
2595 		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
2596 		states = V_pf_status.states;
2597 	}
2598 	if (end && states > start && start < end) {
2599 		if (states < end) {
2600 			timeout = (u_int64_t)timeout * (end - states) /
2601 			    (end - start);
2602 			return ((state->expire / 1000) + timeout);
2603 		}
2604 		else
2605 			return (time_uptime);
2606 	}
2607 	return ((state->expire / 1000) + timeout);
2608 }
2609 
2610 void
2611 pf_purge_expired_src_nodes(void)
2612 {
2613 	struct pf_ksrc_node_list	 freelist;
2614 	struct pf_srchash	*sh;
2615 	struct pf_ksrc_node	*cur, *next;
2616 	int i;
2617 
2618 	LIST_INIT(&freelist);
2619 	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
2620 	    PF_HASHROW_LOCK(sh);
2621 	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
2622 		if (cur->states == 0 && cur->expire <= time_uptime) {
2623 			pf_unlink_src_node(cur);
2624 			LIST_INSERT_HEAD(&freelist, cur, entry);
2625 		} else if (cur->rule != NULL)
2626 			cur->rule->rule_ref |= PFRULE_REFS;
2627 	    PF_HASHROW_UNLOCK(sh);
2628 	}
2629 
2630 	pf_free_src_nodes(&freelist);
2631 
2632 	V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
2633 }
2634 
2635 static void
2636 pf_src_tree_remove_state(struct pf_kstate *s)
2637 {
2638 	struct pf_ksrc_node *sn;
2639 	uint32_t timeout;
2640 
2641 	timeout = s->rule->timeout[PFTM_SRC_NODE] ?
2642 	    s->rule->timeout[PFTM_SRC_NODE] :
2643 	    V_pf_default_rule.timeout[PFTM_SRC_NODE];
2644 
2645 	if (s->src_node != NULL) {
2646 		sn = s->src_node;
2647 		PF_SRC_NODE_LOCK(sn);
2648 		if (s->src.tcp_est)
2649 			--sn->conn;
2650 		if (--sn->states == 0)
2651 			sn->expire = time_uptime + timeout;
2652 		PF_SRC_NODE_UNLOCK(sn);
2653 	}
2654 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
2655 		sn = s->nat_src_node;
2656 		PF_SRC_NODE_LOCK(sn);
2657 		if (--sn->states == 0)
2658 			sn->expire = time_uptime + timeout;
2659 		PF_SRC_NODE_UNLOCK(sn);
2660 	}
2661 	s->src_node = s->nat_src_node = NULL;
2662 }
2663 
2664 /*
2665  * Unlink and potentilly free a state. Function may be
2666  * called with ID hash row locked, but always returns
2667  * unlocked, since it needs to go through key hash locking.
2668  */
2669 int
2670 pf_unlink_state(struct pf_kstate *s)
2671 {
2672 	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
2673 
2674 	NET_EPOCH_ASSERT();
2675 	PF_HASHROW_ASSERT(ih);
2676 
2677 	if (s->timeout == PFTM_UNLINKED) {
2678 		/*
2679 		 * State is being processed
2680 		 * by pf_unlink_state() in
2681 		 * an other thread.
2682 		 */
2683 		PF_HASHROW_UNLOCK(ih);
2684 		return (0);	/* XXXGL: undefined actually */
2685 	}
2686 
2687 	if (s->src.state == PF_TCPS_PROXY_DST) {
2688 		/* XXX wire key the right one? */
2689 		pf_send_tcp(s->rule, s->key[PF_SK_WIRE]->af,
2690 		    &s->key[PF_SK_WIRE]->addr[1],
2691 		    &s->key[PF_SK_WIRE]->addr[0],
2692 		    s->key[PF_SK_WIRE]->port[1],
2693 		    s->key[PF_SK_WIRE]->port[0],
2694 		    s->src.seqhi, s->src.seqlo + 1,
2695 		    TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0,
2696 		    s->act.rtableid);
2697 	}
2698 
2699 	LIST_REMOVE(s, entry);
2700 	pf_src_tree_remove_state(s);
2701 
2702 	if (V_pfsync_delete_state_ptr != NULL)
2703 		V_pfsync_delete_state_ptr(s);
2704 
2705 	STATE_DEC_COUNTERS(s);
2706 
2707 	s->timeout = PFTM_UNLINKED;
2708 
2709 	/* Ensure we remove it from the list of halfopen states, if needed. */
2710 	if (s->key[PF_SK_STACK] != NULL &&
2711 	    s->key[PF_SK_STACK]->proto == IPPROTO_TCP)
2712 		pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED);
2713 
2714 	PF_HASHROW_UNLOCK(ih);
2715 
2716 	pf_detach_state(s);
2717 
2718 	pf_udp_mapping_release(s->udp_mapping);
2719 
2720 	/* pf_state_insert() initialises refs to 2 */
2721 	return (pf_release_staten(s, 2));
2722 }
2723 
2724 struct pf_kstate *
2725 pf_alloc_state(int flags)
2726 {
2727 
2728 	return (uma_zalloc(V_pf_state_z, flags | M_ZERO));
2729 }
2730 
2731 void
2732 pf_free_state(struct pf_kstate *cur)
2733 {
2734 	struct pf_krule_item *ri;
2735 
2736 	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
2737 	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
2738 	    cur->timeout));
2739 
2740 	while ((ri = SLIST_FIRST(&cur->match_rules))) {
2741 		SLIST_REMOVE_HEAD(&cur->match_rules, entry);
2742 		free(ri, M_PF_RULE_ITEM);
2743 	}
2744 
2745 	pf_normalize_tcp_cleanup(cur);
2746 	uma_zfree(V_pf_state_z, cur);
2747 	pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
2748 }
2749 
2750 /*
2751  * Called only from pf_purge_thread(), thus serialized.
2752  */
2753 static u_int
2754 pf_purge_expired_states(u_int i, int maxcheck)
2755 {
2756 	struct pf_idhash *ih;
2757 	struct pf_kstate *s;
2758 	struct pf_krule_item *mrm;
2759 	size_t count __unused;
2760 
2761 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2762 
2763 	/*
2764 	 * Go through hash and unlink states that expire now.
2765 	 */
2766 	while (maxcheck > 0) {
2767 		count = 0;
2768 		ih = &V_pf_idhash[i];
2769 
2770 		/* only take the lock if we expect to do work */
2771 		if (!LIST_EMPTY(&ih->states)) {
2772 relock:
2773 			PF_HASHROW_LOCK(ih);
2774 			LIST_FOREACH(s, &ih->states, entry) {
2775 				if (pf_state_expires(s) <= time_uptime) {
2776 					V_pf_status.states -=
2777 					    pf_unlink_state(s);
2778 					goto relock;
2779 				}
2780 				s->rule->rule_ref |= PFRULE_REFS;
2781 				if (s->nat_rule != NULL)
2782 					s->nat_rule->rule_ref |= PFRULE_REFS;
2783 				if (s->anchor != NULL)
2784 					s->anchor->rule_ref |= PFRULE_REFS;
2785 				s->kif->pfik_flags |= PFI_IFLAG_REFS;
2786 				SLIST_FOREACH(mrm, &s->match_rules, entry)
2787 					mrm->r->rule_ref |= PFRULE_REFS;
2788 				if (s->act.rt_kif)
2789 					s->act.rt_kif->pfik_flags |= PFI_IFLAG_REFS;
2790 				count++;
2791 			}
2792 			PF_HASHROW_UNLOCK(ih);
2793 		}
2794 
2795 		SDT_PROBE2(pf, purge, state, rowcount, i, count);
2796 
2797 		/* Return when we hit end of hash. */
2798 		if (++i > V_pf_hashmask) {
2799 			V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2800 			return (0);
2801 		}
2802 
2803 		maxcheck--;
2804 	}
2805 
2806 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
2807 
2808 	return (i);
2809 }
2810 
2811 static void
2812 pf_purge_unlinked_rules(void)
2813 {
2814 	struct pf_krulequeue tmpq;
2815 	struct pf_krule *r, *r1;
2816 
2817 	/*
2818 	 * If we have overloading task pending, then we'd
2819 	 * better skip purging this time. There is a tiny
2820 	 * probability that overloading task references
2821 	 * an already unlinked rule.
2822 	 */
2823 	PF_OVERLOADQ_LOCK();
2824 	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
2825 		PF_OVERLOADQ_UNLOCK();
2826 		return;
2827 	}
2828 	PF_OVERLOADQ_UNLOCK();
2829 
2830 	/*
2831 	 * Do naive mark-and-sweep garbage collecting of old rules.
2832 	 * Reference flag is raised by pf_purge_expired_states()
2833 	 * and pf_purge_expired_src_nodes().
2834 	 *
2835 	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
2836 	 * use a temporary queue.
2837 	 */
2838 	TAILQ_INIT(&tmpq);
2839 	PF_UNLNKDRULES_LOCK();
2840 	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
2841 		if (!(r->rule_ref & PFRULE_REFS)) {
2842 			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
2843 			TAILQ_INSERT_TAIL(&tmpq, r, entries);
2844 		} else
2845 			r->rule_ref &= ~PFRULE_REFS;
2846 	}
2847 	PF_UNLNKDRULES_UNLOCK();
2848 
2849 	if (!TAILQ_EMPTY(&tmpq)) {
2850 		PF_CONFIG_LOCK();
2851 		PF_RULES_WLOCK();
2852 		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
2853 			TAILQ_REMOVE(&tmpq, r, entries);
2854 			pf_free_rule(r);
2855 		}
2856 		PF_RULES_WUNLOCK();
2857 		PF_CONFIG_UNLOCK();
2858 	}
2859 }
2860 
2861 void
2862 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
2863 {
2864 	switch (af) {
2865 #ifdef INET
2866 	case AF_INET: {
2867 		u_int32_t a = ntohl(addr->addr32[0]);
2868 		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
2869 		    (a>>8)&255, a&255);
2870 		if (p) {
2871 			p = ntohs(p);
2872 			printf(":%u", p);
2873 		}
2874 		break;
2875 	}
2876 #endif /* INET */
2877 #ifdef INET6
2878 	case AF_INET6: {
2879 		u_int16_t b;
2880 		u_int8_t i, curstart, curend, maxstart, maxend;
2881 		curstart = curend = maxstart = maxend = 255;
2882 		for (i = 0; i < 8; i++) {
2883 			if (!addr->addr16[i]) {
2884 				if (curstart == 255)
2885 					curstart = i;
2886 				curend = i;
2887 			} else {
2888 				if ((curend - curstart) >
2889 				    (maxend - maxstart)) {
2890 					maxstart = curstart;
2891 					maxend = curend;
2892 				}
2893 				curstart = curend = 255;
2894 			}
2895 		}
2896 		if ((curend - curstart) >
2897 		    (maxend - maxstart)) {
2898 			maxstart = curstart;
2899 			maxend = curend;
2900 		}
2901 		for (i = 0; i < 8; i++) {
2902 			if (i >= maxstart && i <= maxend) {
2903 				if (i == 0)
2904 					printf(":");
2905 				if (i == maxend)
2906 					printf(":");
2907 			} else {
2908 				b = ntohs(addr->addr16[i]);
2909 				printf("%x", b);
2910 				if (i < 7)
2911 					printf(":");
2912 			}
2913 		}
2914 		if (p) {
2915 			p = ntohs(p);
2916 			printf("[%u]", p);
2917 		}
2918 		break;
2919 	}
2920 #endif /* INET6 */
2921 	}
2922 }
2923 
2924 void
2925 pf_print_state(struct pf_kstate *s)
2926 {
2927 	pf_print_state_parts(s, NULL, NULL);
2928 }
2929 
2930 static void
2931 pf_print_state_parts(struct pf_kstate *s,
2932     struct pf_state_key *skwp, struct pf_state_key *sksp)
2933 {
2934 	struct pf_state_key *skw, *sks;
2935 	u_int8_t proto, dir;
2936 
2937 	/* Do our best to fill these, but they're skipped if NULL */
2938 	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
2939 	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
2940 	proto = skw ? skw->proto : (sks ? sks->proto : 0);
2941 	dir = s ? s->direction : 0;
2942 
2943 	switch (proto) {
2944 	case IPPROTO_IPV4:
2945 		printf("IPv4");
2946 		break;
2947 	case IPPROTO_IPV6:
2948 		printf("IPv6");
2949 		break;
2950 	case IPPROTO_TCP:
2951 		printf("TCP");
2952 		break;
2953 	case IPPROTO_UDP:
2954 		printf("UDP");
2955 		break;
2956 	case IPPROTO_ICMP:
2957 		printf("ICMP");
2958 		break;
2959 	case IPPROTO_ICMPV6:
2960 		printf("ICMPv6");
2961 		break;
2962 	default:
2963 		printf("%u", proto);
2964 		break;
2965 	}
2966 	switch (dir) {
2967 	case PF_IN:
2968 		printf(" in");
2969 		break;
2970 	case PF_OUT:
2971 		printf(" out");
2972 		break;
2973 	}
2974 	if (skw) {
2975 		printf(" wire: ");
2976 		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
2977 		printf(" ");
2978 		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
2979 	}
2980 	if (sks) {
2981 		printf(" stack: ");
2982 		if (sks != skw) {
2983 			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
2984 			printf(" ");
2985 			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
2986 		} else
2987 			printf("-");
2988 	}
2989 	if (s) {
2990 		if (proto == IPPROTO_TCP) {
2991 			printf(" [lo=%u high=%u win=%u modulator=%u",
2992 			    s->src.seqlo, s->src.seqhi,
2993 			    s->src.max_win, s->src.seqdiff);
2994 			if (s->src.wscale && s->dst.wscale)
2995 				printf(" wscale=%u",
2996 				    s->src.wscale & PF_WSCALE_MASK);
2997 			printf("]");
2998 			printf(" [lo=%u high=%u win=%u modulator=%u",
2999 			    s->dst.seqlo, s->dst.seqhi,
3000 			    s->dst.max_win, s->dst.seqdiff);
3001 			if (s->src.wscale && s->dst.wscale)
3002 				printf(" wscale=%u",
3003 				s->dst.wscale & PF_WSCALE_MASK);
3004 			printf("]");
3005 		}
3006 		printf(" %u:%u", s->src.state, s->dst.state);
3007 		if (s->rule)
3008 			printf(" @%d", s->rule->nr);
3009 	}
3010 }
3011 
3012 void
3013 pf_print_flags(uint16_t f)
3014 {
3015 	if (f)
3016 		printf(" ");
3017 	if (f & TH_FIN)
3018 		printf("F");
3019 	if (f & TH_SYN)
3020 		printf("S");
3021 	if (f & TH_RST)
3022 		printf("R");
3023 	if (f & TH_PUSH)
3024 		printf("P");
3025 	if (f & TH_ACK)
3026 		printf("A");
3027 	if (f & TH_URG)
3028 		printf("U");
3029 	if (f & TH_ECE)
3030 		printf("E");
3031 	if (f & TH_CWR)
3032 		printf("W");
3033 	if (f & TH_AE)
3034 		printf("e");
3035 }
3036 
3037 #define	PF_SET_SKIP_STEPS(i)					\
3038 	do {							\
3039 		while (head[i] != cur) {			\
3040 			head[i]->skip[i] = cur;			\
3041 			head[i] = TAILQ_NEXT(head[i], entries);	\
3042 		}						\
3043 	} while (0)
3044 
3045 void
3046 pf_calc_skip_steps(struct pf_krulequeue *rules)
3047 {
3048 	struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT];
3049 	int i;
3050 
3051 	cur = TAILQ_FIRST(rules);
3052 	prev = cur;
3053 	for (i = 0; i < PF_SKIP_COUNT; ++i)
3054 		head[i] = cur;
3055 	while (cur != NULL) {
3056 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
3057 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
3058 		if (cur->direction != prev->direction)
3059 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
3060 		if (cur->af != prev->af)
3061 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
3062 		if (cur->proto != prev->proto)
3063 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
3064 		if (cur->src.neg != prev->src.neg ||
3065 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
3066 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
3067 		if (cur->dst.neg != prev->dst.neg ||
3068 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
3069 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
3070 		if (cur->src.port[0] != prev->src.port[0] ||
3071 		    cur->src.port[1] != prev->src.port[1] ||
3072 		    cur->src.port_op != prev->src.port_op)
3073 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
3074 		if (cur->dst.port[0] != prev->dst.port[0] ||
3075 		    cur->dst.port[1] != prev->dst.port[1] ||
3076 		    cur->dst.port_op != prev->dst.port_op)
3077 			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
3078 
3079 		prev = cur;
3080 		cur = TAILQ_NEXT(cur, entries);
3081 	}
3082 	for (i = 0; i < PF_SKIP_COUNT; ++i)
3083 		PF_SET_SKIP_STEPS(i);
3084 }
3085 
3086 int
3087 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
3088 {
3089 	if (aw1->type != aw2->type)
3090 		return (1);
3091 	switch (aw1->type) {
3092 	case PF_ADDR_ADDRMASK:
3093 	case PF_ADDR_RANGE:
3094 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
3095 			return (1);
3096 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
3097 			return (1);
3098 		return (0);
3099 	case PF_ADDR_DYNIFTL:
3100 		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
3101 	case PF_ADDR_NONE:
3102 	case PF_ADDR_NOROUTE:
3103 	case PF_ADDR_URPFFAILED:
3104 		return (0);
3105 	case PF_ADDR_TABLE:
3106 		return (aw1->p.tbl != aw2->p.tbl);
3107 	default:
3108 		printf("invalid address type: %d\n", aw1->type);
3109 		return (1);
3110 	}
3111 }
3112 
3113 /**
3114  * Checksum updates are a little complicated because the checksum in the TCP/UDP
3115  * header isn't always a full checksum. In some cases (i.e. output) it's a
3116  * pseudo-header checksum, which is a partial checksum over src/dst IP
3117  * addresses, protocol number and length.
3118  *
3119  * That means we have the following cases:
3120  *  * Input or forwarding: we don't have TSO, the checksum fields are full
3121  *  	checksums, we need to update the checksum whenever we change anything.
3122  *  * Output (i.e. the checksum is a pseudo-header checksum):
3123  *  	x The field being updated is src/dst address or affects the length of
3124  *  	the packet. We need to update the pseudo-header checksum (note that this
3125  *  	checksum is not ones' complement).
3126  *  	x Some other field is being modified (e.g. src/dst port numbers): We
3127  *  	don't have to update anything.
3128  **/
3129 u_int16_t
3130 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
3131 {
3132 	u_int32_t x;
3133 
3134 	x = cksum + old - new;
3135 	x = (x + (x >> 16)) & 0xffff;
3136 
3137 	/* optimise: eliminate a branch when not udp */
3138 	if (udp && cksum == 0x0000)
3139 		return cksum;
3140 	if (udp && x == 0x0000)
3141 		x = 0xffff;
3142 
3143 	return (u_int16_t)(x);
3144 }
3145 
3146 static void
3147 pf_patch_8(struct mbuf *m, u_int16_t *cksum, u_int8_t *f, u_int8_t v, bool hi,
3148     u_int8_t udp)
3149 {
3150 	u_int16_t old = htons(hi ? (*f << 8) : *f);
3151 	u_int16_t new = htons(hi ? ( v << 8) :  v);
3152 
3153 	if (*f == v)
3154 		return;
3155 
3156 	*f = v;
3157 
3158 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3159 		return;
3160 
3161 	*cksum = pf_cksum_fixup(*cksum, old, new, udp);
3162 }
3163 
3164 void
3165 pf_patch_16_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int16_t v,
3166     bool hi, u_int8_t udp)
3167 {
3168 	u_int8_t *fb = (u_int8_t *)f;
3169 	u_int8_t *vb = (u_int8_t *)&v;
3170 
3171 	pf_patch_8(m, cksum, fb++, *vb++, hi, udp);
3172 	pf_patch_8(m, cksum, fb++, *vb++, !hi, udp);
3173 }
3174 
3175 void
3176 pf_patch_32_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int32_t v,
3177     bool hi, u_int8_t udp)
3178 {
3179 	u_int8_t *fb = (u_int8_t *)f;
3180 	u_int8_t *vb = (u_int8_t *)&v;
3181 
3182 	pf_patch_8(m, cksum, fb++, *vb++, hi, udp);
3183 	pf_patch_8(m, cksum, fb++, *vb++, !hi, udp);
3184 	pf_patch_8(m, cksum, fb++, *vb++, hi, udp);
3185 	pf_patch_8(m, cksum, fb++, *vb++, !hi, udp);
3186 }
3187 
3188 u_int16_t
3189 pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
3190         u_int16_t new, u_int8_t udp)
3191 {
3192 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3193 		return (cksum);
3194 
3195 	return (pf_cksum_fixup(cksum, old, new, udp));
3196 }
3197 
3198 static void
3199 pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
3200         u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
3201         sa_family_t af, sa_family_t naf)
3202 {
3203 	struct pf_addr	ao;
3204 	u_int16_t	po = *p;
3205 
3206 	PF_ACPY(&ao, a, af);
3207 	if (af == naf)
3208 		PF_ACPY(a, an, af);
3209 
3210 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
3211 		*pc = ~*pc;
3212 
3213 	*p = pn;
3214 
3215 	switch (af) {
3216 #ifdef INET
3217 	case AF_INET:
3218 		switch (naf) {
3219 		case AF_INET:
3220 			*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
3221 			    ao.addr16[0], an->addr16[0], 0),
3222 			    ao.addr16[1], an->addr16[1], 0);
3223 			*p = pn;
3224 
3225 			*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
3226 			    ao.addr16[0], an->addr16[0], u),
3227 			    ao.addr16[1], an->addr16[1], u);
3228 
3229 			*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
3230 			break;
3231 #ifdef INET6
3232 		case AF_INET6:
3233 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3234 			   pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3235 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
3236 			    ao.addr16[0], an->addr16[0], u),
3237 			    ao.addr16[1], an->addr16[1], u),
3238 			    0,            an->addr16[2], u),
3239 			    0,            an->addr16[3], u),
3240 			    0,            an->addr16[4], u),
3241 			    0,            an->addr16[5], u),
3242 			    0,            an->addr16[6], u),
3243 			    0,            an->addr16[7], u),
3244 			    po, pn, u);
3245 
3246 			/* XXXKP TODO *ic checksum? */
3247 			break;
3248 #endif /* INET6 */
3249 		}
3250 		break;
3251 #endif /* INET */
3252 #ifdef INET6
3253 	case AF_INET6:
3254 		switch (naf) {
3255 #ifdef INET
3256 		case AF_INET:
3257 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3258 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3259 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
3260 			    ao.addr16[0], an->addr16[0], u),
3261 			    ao.addr16[1], an->addr16[1], u),
3262 			    ao.addr16[2], 0,             u),
3263 			    ao.addr16[3], 0,             u),
3264 			    ao.addr16[4], 0,             u),
3265 			    ao.addr16[5], 0,             u),
3266 			    ao.addr16[6], 0,             u),
3267 			    ao.addr16[7], 0,             u),
3268 			    po, pn, u);
3269 
3270 			/* XXXKP TODO *ic checksum? */
3271 			break;
3272 #endif /* INET */
3273 		case AF_INET6:
3274 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3275 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3276 			    pf_cksum_fixup(pf_cksum_fixup(*pc,
3277 			    ao.addr16[0], an->addr16[0], u),
3278 			    ao.addr16[1], an->addr16[1], u),
3279 			    ao.addr16[2], an->addr16[2], u),
3280 			    ao.addr16[3], an->addr16[3], u),
3281 			    ao.addr16[4], an->addr16[4], u),
3282 			    ao.addr16[5], an->addr16[5], u),
3283 			    ao.addr16[6], an->addr16[6], u),
3284 			    ao.addr16[7], an->addr16[7], u);
3285 
3286 			*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
3287 			break;
3288 		}
3289 		break;
3290 #endif /* INET6 */
3291 	}
3292 
3293 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
3294 	    CSUM_DELAY_DATA_IPV6)) {
3295 		*pc = ~*pc;
3296 		if (! *pc)
3297 			*pc = 0xffff;
3298 	}
3299 }
3300 
3301 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
3302 void
3303 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
3304 {
3305 	u_int32_t	ao;
3306 
3307 	memcpy(&ao, a, sizeof(ao));
3308 	memcpy(a, &an, sizeof(u_int32_t));
3309 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
3310 	    ao % 65536, an % 65536, u);
3311 }
3312 
3313 void
3314 pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
3315 {
3316 	u_int32_t	ao;
3317 
3318 	memcpy(&ao, a, sizeof(ao));
3319 	memcpy(a, &an, sizeof(u_int32_t));
3320 
3321 	*c = pf_proto_cksum_fixup(m,
3322 	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
3323 	    ao % 65536, an % 65536, udp);
3324 }
3325 
3326 #ifdef INET6
3327 static void
3328 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
3329 {
3330 	struct pf_addr	ao;
3331 
3332 	PF_ACPY(&ao, a, AF_INET6);
3333 	PF_ACPY(a, an, AF_INET6);
3334 
3335 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3336 	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3337 	    pf_cksum_fixup(pf_cksum_fixup(*c,
3338 	    ao.addr16[0], an->addr16[0], u),
3339 	    ao.addr16[1], an->addr16[1], u),
3340 	    ao.addr16[2], an->addr16[2], u),
3341 	    ao.addr16[3], an->addr16[3], u),
3342 	    ao.addr16[4], an->addr16[4], u),
3343 	    ao.addr16[5], an->addr16[5], u),
3344 	    ao.addr16[6], an->addr16[6], u),
3345 	    ao.addr16[7], an->addr16[7], u);
3346 }
3347 #endif /* INET6 */
3348 
3349 static void
3350 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
3351     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
3352     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
3353 {
3354 	struct pf_addr	oia, ooa;
3355 
3356 	PF_ACPY(&oia, ia, af);
3357 	if (oa)
3358 		PF_ACPY(&ooa, oa, af);
3359 
3360 	/* Change inner protocol port, fix inner protocol checksum. */
3361 	if (ip != NULL) {
3362 		u_int16_t	oip = *ip;
3363 		u_int32_t	opc;
3364 
3365 		if (pc != NULL)
3366 			opc = *pc;
3367 		*ip = np;
3368 		if (pc != NULL)
3369 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
3370 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
3371 		if (pc != NULL)
3372 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
3373 	}
3374 	/* Change inner ip address, fix inner ip and icmp checksums. */
3375 	PF_ACPY(ia, na, af);
3376 	switch (af) {
3377 #ifdef INET
3378 	case AF_INET: {
3379 		u_int32_t	 oh2c = *h2c;
3380 
3381 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
3382 		    oia.addr16[0], ia->addr16[0], 0),
3383 		    oia.addr16[1], ia->addr16[1], 0);
3384 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
3385 		    oia.addr16[0], ia->addr16[0], 0),
3386 		    oia.addr16[1], ia->addr16[1], 0);
3387 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
3388 		break;
3389 	}
3390 #endif /* INET */
3391 #ifdef INET6
3392 	case AF_INET6:
3393 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3394 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3395 		    pf_cksum_fixup(pf_cksum_fixup(*ic,
3396 		    oia.addr16[0], ia->addr16[0], u),
3397 		    oia.addr16[1], ia->addr16[1], u),
3398 		    oia.addr16[2], ia->addr16[2], u),
3399 		    oia.addr16[3], ia->addr16[3], u),
3400 		    oia.addr16[4], ia->addr16[4], u),
3401 		    oia.addr16[5], ia->addr16[5], u),
3402 		    oia.addr16[6], ia->addr16[6], u),
3403 		    oia.addr16[7], ia->addr16[7], u);
3404 		break;
3405 #endif /* INET6 */
3406 	}
3407 	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
3408 	if (oa) {
3409 		PF_ACPY(oa, na, af);
3410 		switch (af) {
3411 #ifdef INET
3412 		case AF_INET:
3413 			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
3414 			    ooa.addr16[0], oa->addr16[0], 0),
3415 			    ooa.addr16[1], oa->addr16[1], 0);
3416 			break;
3417 #endif /* INET */
3418 #ifdef INET6
3419 		case AF_INET6:
3420 			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3421 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
3422 			    pf_cksum_fixup(pf_cksum_fixup(*ic,
3423 			    ooa.addr16[0], oa->addr16[0], u),
3424 			    ooa.addr16[1], oa->addr16[1], u),
3425 			    ooa.addr16[2], oa->addr16[2], u),
3426 			    ooa.addr16[3], oa->addr16[3], u),
3427 			    ooa.addr16[4], oa->addr16[4], u),
3428 			    ooa.addr16[5], oa->addr16[5], u),
3429 			    ooa.addr16[6], oa->addr16[6], u),
3430 			    ooa.addr16[7], oa->addr16[7], u);
3431 			break;
3432 #endif /* INET6 */
3433 		}
3434 	}
3435 }
3436 
3437 int
3438 pf_translate_af(struct pf_pdesc *pd)
3439 {
3440 #if defined(INET) && defined(INET6)
3441 	struct mbuf		*mp;
3442 	struct ip		*ip4;
3443 	struct ip6_hdr		*ip6;
3444 	struct icmp6_hdr	*icmp;
3445 	struct m_tag		*mtag;
3446 	struct pf_fragment_tag	*ftag;
3447 	int			 hlen;
3448 
3449 	hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
3450 
3451 	/* trim the old header */
3452 	m_adj(pd->m, pd->off);
3453 
3454 	/* prepend a new one */
3455 	M_PREPEND(pd->m, hlen, M_NOWAIT);
3456 	if (pd->m == NULL)
3457 		return (-1);
3458 
3459 	switch (pd->naf) {
3460 	case AF_INET:
3461 		ip4 = mtod(pd->m, struct ip *);
3462 		bzero(ip4, hlen);
3463 		ip4->ip_v = IPVERSION;
3464 		ip4->ip_hl = hlen >> 2;
3465 		ip4->ip_tos = pd->tos;
3466 		ip4->ip_len = htons(hlen + (pd->tot_len - pd->off));
3467 		ip_fillid(ip4);
3468 		ip4->ip_ttl = pd->ttl;
3469 		ip4->ip_p = pd->proto;
3470 		ip4->ip_src = pd->nsaddr.v4;
3471 		ip4->ip_dst = pd->ndaddr.v4;
3472 		pd->src = (struct pf_addr *)&ip4->ip_src;
3473 		pd->dst = (struct pf_addr *)&ip4->ip_dst;
3474 		pd->off = sizeof(struct ip);
3475 		break;
3476 	case AF_INET6:
3477 		ip6 = mtod(pd->m, struct ip6_hdr *);
3478 		bzero(ip6, hlen);
3479 		ip6->ip6_vfc = IPV6_VERSION;
3480 		ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
3481 		ip6->ip6_plen = htons(pd->tot_len - pd->off);
3482 		ip6->ip6_nxt = pd->proto;
3483 		if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
3484 			ip6->ip6_hlim = IPV6_DEFHLIM;
3485 		else
3486 			ip6->ip6_hlim = pd->ttl;
3487 		ip6->ip6_src = pd->nsaddr.v6;
3488 		ip6->ip6_dst = pd->ndaddr.v6;
3489 		pd->src = (struct pf_addr *)&ip6->ip6_src;
3490 		pd->dst = (struct pf_addr *)&ip6->ip6_dst;
3491 		pd->off = sizeof(struct ip6_hdr);
3492 
3493 		/*
3494 		 * If we're dealing with a reassembled packet we need to adjust
3495 		 * the header length from the IPv4 header size to IPv6 header
3496 		 * size.
3497 		 */
3498 		mtag = m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL);
3499 		if (mtag) {
3500 			ftag = (struct pf_fragment_tag *)(mtag + 1);
3501 			ftag->ft_hdrlen = sizeof(*ip6);
3502 			ftag->ft_maxlen -= sizeof(struct ip6_hdr) -
3503 			    sizeof(struct ip) + sizeof(struct ip6_frag);
3504 		}
3505 		break;
3506 	default:
3507 		return (-1);
3508 	}
3509 
3510 	/* recalculate icmp/icmp6 checksums */
3511 	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
3512 		int off;
3513 		if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) ==
3514 		    NULL) {
3515 			pd->m = NULL;
3516 			return (-1);
3517 		}
3518 		icmp = (struct icmp6_hdr *)(mp->m_data + off);
3519 		icmp->icmp6_cksum = 0;
3520 		icmp->icmp6_cksum = pd->naf == AF_INET ?
3521 		    in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) :
3522 		    in6_cksum(pd->m, IPPROTO_ICMPV6, hlen,
3523 			ntohs(ip6->ip6_plen));
3524 	}
3525 #endif /* INET && INET6 */
3526 
3527 	return (0);
3528 }
3529 
3530 int
3531 pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd,
3532     struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
3533     sa_family_t af, sa_family_t naf)
3534 {
3535 #if defined(INET) && defined(INET6)
3536 	struct mbuf	*n = NULL;
3537 	struct ip	*ip4;
3538 	struct ip6_hdr	*ip6;
3539 	int		 hlen, olen, mlen;
3540 
3541 	if (af == naf || (af != AF_INET && af != AF_INET6) ||
3542 	    (naf != AF_INET && naf != AF_INET6))
3543 		return (-1);
3544 
3545 	/* split the mbuf chain on the inner ip/ip6 header boundary */
3546 	if ((n = m_split(m, off, M_NOWAIT)) == NULL)
3547 		return (-1);
3548 
3549 	/* old header */
3550 	olen = pd2->off - off;
3551 	/* new header */
3552 	hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
3553 	/* data lenght */
3554 	mlen = m->m_pkthdr.len - pd2->off;
3555 
3556 	/* trim old header */
3557 	m_adj(n, olen);
3558 
3559 	/* prepend a new one */
3560 	M_PREPEND(n, hlen, M_NOWAIT);
3561 	if (n == NULL)
3562 		return (-1);
3563 
3564 	/* translate inner ip/ip6 header */
3565 	switch (naf) {
3566 	case AF_INET:
3567 		ip4 = mtod(n, struct ip *);
3568 		bzero(ip4, sizeof(*ip4));
3569 		ip4->ip_v = IPVERSION;
3570 		ip4->ip_hl = sizeof(*ip4) >> 2;
3571 		ip4->ip_len = htons(sizeof(*ip4) + mlen);
3572 		ip_fillid(ip4);
3573 		ip4->ip_off = htons(IP_DF);
3574 		ip4->ip_ttl = pd2->ttl;
3575 		if (pd2->proto == IPPROTO_ICMPV6)
3576 			ip4->ip_p = IPPROTO_ICMP;
3577 		else
3578 			ip4->ip_p = pd2->proto;
3579 		ip4->ip_src = src->v4;
3580 		ip4->ip_dst = dst->v4;
3581 		ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
3582 		break;
3583 	case AF_INET6:
3584 		ip6 = mtod(n, struct ip6_hdr *);
3585 		bzero(ip6, sizeof(*ip6));
3586 		ip6->ip6_vfc = IPV6_VERSION;
3587 		ip6->ip6_plen = htons(mlen);
3588 		if (pd2->proto == IPPROTO_ICMP)
3589 			ip6->ip6_nxt = IPPROTO_ICMPV6;
3590 		else
3591 			ip6->ip6_nxt = pd2->proto;
3592 		if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
3593 			ip6->ip6_hlim = IPV6_DEFHLIM;
3594 		else
3595 			ip6->ip6_hlim = pd2->ttl;
3596 		ip6->ip6_src = src->v6;
3597 		ip6->ip6_dst = dst->v6;
3598 		break;
3599 	}
3600 
3601 	/* adjust payload offset and total packet length */
3602 	pd2->off += hlen - olen;
3603 	pd->tot_len += hlen - olen;
3604 
3605 	/* merge modified inner packet with the original header */
3606 	mlen = n->m_pkthdr.len;
3607 	m_cat(m, n);
3608 	m->m_pkthdr.len += mlen;
3609 #endif /* INET && INET6 */
3610 
3611 	return (0);
3612 }
3613 
3614 #define PTR_IP(field)	(offsetof(struct ip, field))
3615 #define PTR_IP6(field)	(offsetof(struct ip6_hdr, field))
3616 
3617 int
3618 pf_translate_icmp_af(int af, void *arg)
3619 {
3620 #if defined(INET) && defined(INET6)
3621 	struct icmp		*icmp4;
3622 	struct icmp6_hdr	*icmp6;
3623 	u_int32_t		 mtu;
3624 	int32_t			 ptr = -1;
3625 	u_int8_t		 type;
3626 	u_int8_t		 code;
3627 
3628 	switch (af) {
3629 	case AF_INET:
3630 		icmp6 = arg;
3631 		type = icmp6->icmp6_type;
3632 		code = icmp6->icmp6_code;
3633 		mtu = ntohl(icmp6->icmp6_mtu);
3634 
3635 		switch (type) {
3636 		case ICMP6_ECHO_REQUEST:
3637 			type = ICMP_ECHO;
3638 			break;
3639 		case ICMP6_ECHO_REPLY:
3640 			type = ICMP_ECHOREPLY;
3641 			break;
3642 		case ICMP6_DST_UNREACH:
3643 			type = ICMP_UNREACH;
3644 			switch (code) {
3645 			case ICMP6_DST_UNREACH_NOROUTE:
3646 			case ICMP6_DST_UNREACH_BEYONDSCOPE:
3647 			case ICMP6_DST_UNREACH_ADDR:
3648 				code = ICMP_UNREACH_HOST;
3649 				break;
3650 			case ICMP6_DST_UNREACH_ADMIN:
3651 				code = ICMP_UNREACH_HOST_PROHIB;
3652 				break;
3653 			case ICMP6_DST_UNREACH_NOPORT:
3654 				code = ICMP_UNREACH_PORT;
3655 				break;
3656 			default:
3657 				return (-1);
3658 			}
3659 			break;
3660 		case ICMP6_PACKET_TOO_BIG:
3661 			type = ICMP_UNREACH;
3662 			code = ICMP_UNREACH_NEEDFRAG;
3663 			mtu -= 20;
3664 			break;
3665 		case ICMP6_TIME_EXCEEDED:
3666 			type = ICMP_TIMXCEED;
3667 			break;
3668 		case ICMP6_PARAM_PROB:
3669 			switch (code) {
3670 			case ICMP6_PARAMPROB_HEADER:
3671 				type = ICMP_PARAMPROB;
3672 				code = ICMP_PARAMPROB_ERRATPTR;
3673 				ptr = ntohl(icmp6->icmp6_pptr);
3674 
3675 				if (ptr == PTR_IP6(ip6_vfc))
3676 					; /* preserve */
3677 				else if (ptr == PTR_IP6(ip6_vfc) + 1)
3678 					ptr = PTR_IP(ip_tos);
3679 				else if (ptr == PTR_IP6(ip6_plen) ||
3680 				    ptr == PTR_IP6(ip6_plen) + 1)
3681 					ptr = PTR_IP(ip_len);
3682 				else if (ptr == PTR_IP6(ip6_nxt))
3683 					ptr = PTR_IP(ip_p);
3684 				else if (ptr == PTR_IP6(ip6_hlim))
3685 					ptr = PTR_IP(ip_ttl);
3686 				else if (ptr >= PTR_IP6(ip6_src) &&
3687 				    ptr < PTR_IP6(ip6_dst))
3688 					ptr = PTR_IP(ip_src);
3689 				else if (ptr >= PTR_IP6(ip6_dst) &&
3690 				    ptr < sizeof(struct ip6_hdr))
3691 					ptr = PTR_IP(ip_dst);
3692 				else {
3693 					return (-1);
3694 				}
3695 				break;
3696 			case ICMP6_PARAMPROB_NEXTHEADER:
3697 				type = ICMP_UNREACH;
3698 				code = ICMP_UNREACH_PROTOCOL;
3699 				break;
3700 			default:
3701 				return (-1);
3702 			}
3703 			break;
3704 		default:
3705 			return (-1);
3706 		}
3707 		if (icmp6->icmp6_type != type) {
3708 			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3709 			    icmp6->icmp6_type, type, 0);
3710 			icmp6->icmp6_type = type;
3711 		}
3712 		if (icmp6->icmp6_code != code) {
3713 			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3714 			    icmp6->icmp6_code, code, 0);
3715 			icmp6->icmp6_code = code;
3716 		}
3717 		if (icmp6->icmp6_mtu != htonl(mtu)) {
3718 			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3719 			    htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0);
3720 			/* aligns well with a icmpv4 nextmtu */
3721 			icmp6->icmp6_mtu = htonl(mtu);
3722 		}
3723 		if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) {
3724 			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
3725 			    htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0);
3726 			/* icmpv4 pptr is a one most significant byte */
3727 			icmp6->icmp6_pptr = htonl(ptr << 24);
3728 		}
3729 		break;
3730 	case AF_INET6:
3731 		icmp4 = arg;
3732 		type = icmp4->icmp_type;
3733 		code = icmp4->icmp_code;
3734 		mtu = ntohs(icmp4->icmp_nextmtu);
3735 
3736 		switch (type) {
3737 		case ICMP_ECHO:
3738 			type = ICMP6_ECHO_REQUEST;
3739 			break;
3740 		case ICMP_ECHOREPLY:
3741 			type = ICMP6_ECHO_REPLY;
3742 			break;
3743 		case ICMP_UNREACH:
3744 			type = ICMP6_DST_UNREACH;
3745 			switch (code) {
3746 			case ICMP_UNREACH_NET:
3747 			case ICMP_UNREACH_HOST:
3748 			case ICMP_UNREACH_NET_UNKNOWN:
3749 			case ICMP_UNREACH_HOST_UNKNOWN:
3750 			case ICMP_UNREACH_ISOLATED:
3751 			case ICMP_UNREACH_TOSNET:
3752 			case ICMP_UNREACH_TOSHOST:
3753 				code = ICMP6_DST_UNREACH_NOROUTE;
3754 				break;
3755 			case ICMP_UNREACH_PORT:
3756 				code = ICMP6_DST_UNREACH_NOPORT;
3757 				break;
3758 			case ICMP_UNREACH_NET_PROHIB:
3759 			case ICMP_UNREACH_HOST_PROHIB:
3760 			case ICMP_UNREACH_FILTER_PROHIB:
3761 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
3762 				code = ICMP6_DST_UNREACH_ADMIN;
3763 				break;
3764 			case ICMP_UNREACH_PROTOCOL:
3765 				type = ICMP6_PARAM_PROB;
3766 				code = ICMP6_PARAMPROB_NEXTHEADER;
3767 				ptr = offsetof(struct ip6_hdr, ip6_nxt);
3768 				break;
3769 			case ICMP_UNREACH_NEEDFRAG:
3770 				type = ICMP6_PACKET_TOO_BIG;
3771 				code = 0;
3772 				mtu += 20;
3773 				break;
3774 			default:
3775 				return (-1);
3776 			}
3777 			break;
3778 		case ICMP_TIMXCEED:
3779 			type = ICMP6_TIME_EXCEEDED;
3780 			break;
3781 		case ICMP_PARAMPROB:
3782 			type = ICMP6_PARAM_PROB;
3783 			switch (code) {
3784 			case ICMP_PARAMPROB_ERRATPTR:
3785 				code = ICMP6_PARAMPROB_HEADER;
3786 				break;
3787 			case ICMP_PARAMPROB_LENGTH:
3788 				code = ICMP6_PARAMPROB_HEADER;
3789 				break;
3790 			default:
3791 				return (-1);
3792 			}
3793 
3794 			ptr = icmp4->icmp_pptr;
3795 			if (ptr == 0 || ptr == PTR_IP(ip_tos))
3796 				; /* preserve */
3797 			else if (ptr == PTR_IP(ip_len) ||
3798 			    ptr == PTR_IP(ip_len) + 1)
3799 				ptr = PTR_IP6(ip6_plen);
3800 			else if (ptr == PTR_IP(ip_ttl))
3801 				ptr = PTR_IP6(ip6_hlim);
3802 			else if (ptr == PTR_IP(ip_p))
3803 				ptr = PTR_IP6(ip6_nxt);
3804 			else if (ptr >= PTR_IP(ip_src) && ptr < PTR_IP(ip_dst))
3805 				ptr = PTR_IP6(ip6_src);
3806 			else if (ptr >= PTR_IP(ip_dst) &&
3807 			    ptr < sizeof(struct ip))
3808 				ptr = PTR_IP6(ip6_dst);
3809 			else {
3810 				return (-1);
3811 			}
3812 			break;
3813 		default:
3814 			return (-1);
3815 		}
3816 		if (icmp4->icmp_type != type) {
3817 			icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
3818 			    icmp4->icmp_type, type, 0);
3819 			icmp4->icmp_type = type;
3820 		}
3821 		if (icmp4->icmp_code != code) {
3822 			icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
3823 			    icmp4->icmp_code, code, 0);
3824 			icmp4->icmp_code = code;
3825 		}
3826 		if (icmp4->icmp_nextmtu != htons(mtu)) {
3827 			icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
3828 			    icmp4->icmp_nextmtu, htons(mtu), 0);
3829 			icmp4->icmp_nextmtu = htons(mtu);
3830 		}
3831 		if (ptr >= 0 && icmp4->icmp_void != ptr) {
3832 			icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum,
3833 			    htons(icmp4->icmp_pptr), htons(ptr), 0);
3834 			icmp4->icmp_void = htonl(ptr);
3835 		}
3836 		break;
3837 	}
3838 #endif /* INET && INET6 */
3839 
3840 	return (0);
3841 }
3842 
3843 /*
3844  * Need to modulate the sequence numbers in the TCP SACK option
3845  * (credits to Krzysztof Pfaff for report and patch)
3846  */
3847 static int
3848 pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th,
3849     struct pf_state_peer *dst)
3850 {
3851 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
3852 	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
3853 	int copyback = 0, i, olen;
3854 	struct sackblk sack;
3855 
3856 #define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
3857 	if (hlen < TCPOLEN_SACKLEN ||
3858 	    !pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
3859 		return 0;
3860 
3861 	while (hlen >= TCPOLEN_SACKLEN) {
3862 		size_t startoff = opt - opts;
3863 		olen = opt[1];
3864 		switch (*opt) {
3865 		case TCPOPT_EOL:	/* FALLTHROUGH */
3866 		case TCPOPT_NOP:
3867 			opt++;
3868 			hlen--;
3869 			break;
3870 		case TCPOPT_SACK:
3871 			if (olen > hlen)
3872 				olen = hlen;
3873 			if (olen >= TCPOLEN_SACKLEN) {
3874 				for (i = 2; i + TCPOLEN_SACK <= olen;
3875 				    i += TCPOLEN_SACK) {
3876 					memcpy(&sack, &opt[i], sizeof(sack));
3877 					pf_patch_32_unaligned(pd->m,
3878 					    &th->th_sum, &sack.start,
3879 					    htonl(ntohl(sack.start) - dst->seqdiff),
3880 					    PF_ALGNMNT(startoff),
3881 					    0);
3882 					pf_patch_32_unaligned(pd->m, &th->th_sum,
3883 					    &sack.end,
3884 					    htonl(ntohl(sack.end) - dst->seqdiff),
3885 					    PF_ALGNMNT(startoff),
3886 					    0);
3887 					memcpy(&opt[i], &sack, sizeof(sack));
3888 				}
3889 				copyback = 1;
3890 			}
3891 			/* FALLTHROUGH */
3892 		default:
3893 			if (olen < 2)
3894 				olen = 2;
3895 			hlen -= olen;
3896 			opt += olen;
3897 		}
3898 	}
3899 
3900 	if (copyback)
3901 		m_copyback(pd->m, pd->off + sizeof(*th), thoptlen, (caddr_t)opts);
3902 	return (copyback);
3903 }
3904 
3905 struct mbuf *
3906 pf_build_tcp(const struct pf_krule *r, sa_family_t af,
3907     const struct pf_addr *saddr, const struct pf_addr *daddr,
3908     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
3909     u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
3910     int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid)
3911 {
3912 	struct mbuf	*m;
3913 	int		 len, tlen;
3914 #ifdef INET
3915 	struct ip	*h = NULL;
3916 #endif /* INET */
3917 #ifdef INET6
3918 	struct ip6_hdr	*h6 = NULL;
3919 #endif /* INET6 */
3920 	struct tcphdr	*th;
3921 	char		*opt;
3922 	struct pf_mtag  *pf_mtag;
3923 
3924 	len = 0;
3925 	th = NULL;
3926 
3927 	/* maximum segment size tcp option */
3928 	tlen = sizeof(struct tcphdr);
3929 	if (mss)
3930 		tlen += 4;
3931 
3932 	switch (af) {
3933 #ifdef INET
3934 	case AF_INET:
3935 		len = sizeof(struct ip) + tlen;
3936 		break;
3937 #endif /* INET */
3938 #ifdef INET6
3939 	case AF_INET6:
3940 		len = sizeof(struct ip6_hdr) + tlen;
3941 		break;
3942 #endif /* INET6 */
3943 	}
3944 
3945 	m = m_gethdr(M_NOWAIT, MT_DATA);
3946 	if (m == NULL)
3947 		return (NULL);
3948 
3949 #ifdef MAC
3950 	mac_netinet_firewall_send(m);
3951 #endif
3952 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
3953 		m_freem(m);
3954 		return (NULL);
3955 	}
3956 	m->m_flags |= mbuf_flags;
3957 	pf_mtag->tag = mtag_tag;
3958 	pf_mtag->flags = mtag_flags;
3959 
3960 	if (rtableid >= 0)
3961 		M_SETFIB(m, rtableid);
3962 
3963 #ifdef ALTQ
3964 	if (r != NULL && r->qid) {
3965 		pf_mtag->qid = r->qid;
3966 
3967 		/* add hints for ecn */
3968 		pf_mtag->hdr = mtod(m, struct ip *);
3969 	}
3970 #endif /* ALTQ */
3971 	m->m_data += max_linkhdr;
3972 	m->m_pkthdr.len = m->m_len = len;
3973 	/* The rest of the stack assumes a rcvif, so provide one.
3974 	 * This is a locally generated packet, so .. close enough. */
3975 	m->m_pkthdr.rcvif = V_loif;
3976 	bzero(m->m_data, len);
3977 	switch (af) {
3978 #ifdef INET
3979 	case AF_INET:
3980 		h = mtod(m, struct ip *);
3981 
3982 		/* IP header fields included in the TCP checksum */
3983 		h->ip_p = IPPROTO_TCP;
3984 		h->ip_len = htons(tlen);
3985 		h->ip_src.s_addr = saddr->v4.s_addr;
3986 		h->ip_dst.s_addr = daddr->v4.s_addr;
3987 
3988 		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
3989 		break;
3990 #endif /* INET */
3991 #ifdef INET6
3992 	case AF_INET6:
3993 		h6 = mtod(m, struct ip6_hdr *);
3994 
3995 		/* IP header fields included in the TCP checksum */
3996 		h6->ip6_nxt = IPPROTO_TCP;
3997 		h6->ip6_plen = htons(tlen);
3998 		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
3999 		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
4000 
4001 		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
4002 		break;
4003 #endif /* INET6 */
4004 	}
4005 
4006 	/* TCP header */
4007 	th->th_sport = sport;
4008 	th->th_dport = dport;
4009 	th->th_seq = htonl(seq);
4010 	th->th_ack = htonl(ack);
4011 	th->th_off = tlen >> 2;
4012 	tcp_set_flags(th, tcp_flags);
4013 	th->th_win = htons(win);
4014 
4015 	if (mss) {
4016 		opt = (char *)(th + 1);
4017 		opt[0] = TCPOPT_MAXSEG;
4018 		opt[1] = 4;
4019 		HTONS(mss);
4020 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
4021 	}
4022 
4023 	switch (af) {
4024 #ifdef INET
4025 	case AF_INET:
4026 		/* TCP checksum */
4027 		th->th_sum = in_cksum(m, len);
4028 
4029 		/* Finish the IP header */
4030 		h->ip_v = 4;
4031 		h->ip_hl = sizeof(*h) >> 2;
4032 		h->ip_tos = IPTOS_LOWDELAY;
4033 		h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
4034 		h->ip_len = htons(len);
4035 		h->ip_ttl = ttl ? ttl : V_ip_defttl;
4036 		h->ip_sum = 0;
4037 		break;
4038 #endif /* INET */
4039 #ifdef INET6
4040 	case AF_INET6:
4041 		/* TCP checksum */
4042 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
4043 		    sizeof(struct ip6_hdr), tlen);
4044 
4045 		h6->ip6_vfc |= IPV6_VERSION;
4046 		h6->ip6_hlim = IPV6_DEFHLIM;
4047 		break;
4048 #endif /* INET6 */
4049 	}
4050 
4051 	return (m);
4052 }
4053 
4054 static void
4055 pf_send_sctp_abort(sa_family_t af, struct pf_pdesc *pd,
4056     uint8_t ttl, int rtableid)
4057 {
4058 	struct mbuf		*m;
4059 #ifdef INET
4060 	struct ip		*h = NULL;
4061 #endif /* INET */
4062 #ifdef INET6
4063 	struct ip6_hdr		*h6 = NULL;
4064 #endif /* INET6 */
4065 	struct sctphdr		*hdr;
4066 	struct sctp_chunkhdr	*chunk;
4067 	struct pf_send_entry	*pfse;
4068 	int			 off = 0;
4069 
4070 	MPASS(af == pd->af);
4071 
4072 	m = m_gethdr(M_NOWAIT, MT_DATA);
4073 	if (m == NULL)
4074 		return;
4075 
4076 	m->m_data += max_linkhdr;
4077 	m->m_flags |= M_SKIP_FIREWALL;
4078 	/* The rest of the stack assumes a rcvif, so provide one.
4079 	 * This is a locally generated packet, so .. close enough. */
4080 	m->m_pkthdr.rcvif = V_loif;
4081 
4082 	/* IPv4|6 header */
4083 	switch (af) {
4084 #ifdef INET
4085 	case AF_INET:
4086 		bzero(m->m_data, sizeof(struct ip) + sizeof(*hdr) + sizeof(*chunk));
4087 
4088 		h = mtod(m, struct ip *);
4089 
4090 		/* IP header fields included in the TCP checksum */
4091 
4092 		h->ip_p = IPPROTO_SCTP;
4093 		h->ip_len = htons(sizeof(*h) + sizeof(*hdr) + sizeof(*chunk));
4094 		h->ip_ttl = ttl ? ttl : V_ip_defttl;
4095 		h->ip_src = pd->dst->v4;
4096 		h->ip_dst = pd->src->v4;
4097 
4098 		off += sizeof(struct ip);
4099 		break;
4100 #endif /* INET */
4101 #ifdef INET6
4102 	case AF_INET6:
4103 		bzero(m->m_data, sizeof(struct ip6_hdr) + sizeof(*hdr) + sizeof(*chunk));
4104 
4105 		h6 = mtod(m, struct ip6_hdr *);
4106 
4107 		/* IP header fields included in the TCP checksum */
4108 		h6->ip6_vfc |= IPV6_VERSION;
4109 		h6->ip6_nxt = IPPROTO_SCTP;
4110 		h6->ip6_plen = htons(sizeof(*h6) + sizeof(*hdr) + sizeof(*chunk));
4111 		h6->ip6_hlim = ttl ? ttl : V_ip6_defhlim;
4112 		memcpy(&h6->ip6_src, &pd->dst->v6, sizeof(struct in6_addr));
4113 		memcpy(&h6->ip6_dst, &pd->src->v6, sizeof(struct in6_addr));
4114 
4115 		off += sizeof(struct ip6_hdr);
4116 		break;
4117 #endif /* INET6 */
4118 	}
4119 
4120 	/* SCTP header */
4121 	hdr = mtodo(m, off);
4122 
4123 	hdr->src_port = pd->hdr.sctp.dest_port;
4124 	hdr->dest_port = pd->hdr.sctp.src_port;
4125 	hdr->v_tag = pd->sctp_initiate_tag;
4126 	hdr->checksum = 0;
4127 
4128 	/* Abort chunk. */
4129 	off += sizeof(struct sctphdr);
4130 	chunk = mtodo(m, off);
4131 
4132 	chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
4133 	chunk->chunk_length = htons(sizeof(*chunk));
4134 
4135 	/* SCTP checksum */
4136 	off += sizeof(*chunk);
4137 	m->m_pkthdr.len = m->m_len = off;
4138 
4139 	pf_sctp_checksum(m, off - sizeof(*hdr) - sizeof(*chunk));
4140 
4141 	if (rtableid >= 0)
4142 		M_SETFIB(m, rtableid);
4143 
4144 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4145 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4146 	if (pfse == NULL) {
4147 		m_freem(m);
4148 		return;
4149 	}
4150 
4151 	switch (af) {
4152 #ifdef INET
4153 	case AF_INET:
4154 		pfse->pfse_type = PFSE_IP;
4155 		break;
4156 #endif /* INET */
4157 #ifdef INET6
4158 	case AF_INET6:
4159 		pfse->pfse_type = PFSE_IP6;
4160 		break;
4161 #endif /* INET6 */
4162 	}
4163 
4164 	pfse->pfse_m = m;
4165 	pf_send(pfse);
4166 }
4167 
4168 void
4169 pf_send_tcp(const struct pf_krule *r, sa_family_t af,
4170     const struct pf_addr *saddr, const struct pf_addr *daddr,
4171     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
4172     u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl,
4173     int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid)
4174 {
4175 	struct pf_send_entry *pfse;
4176 	struct mbuf	*m;
4177 
4178 	m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags,
4179 	    win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, rtableid);
4180 	if (m == NULL)
4181 		return;
4182 
4183 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4184 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4185 	if (pfse == NULL) {
4186 		m_freem(m);
4187 		return;
4188 	}
4189 
4190 	switch (af) {
4191 #ifdef INET
4192 	case AF_INET:
4193 		pfse->pfse_type = PFSE_IP;
4194 		break;
4195 #endif /* INET */
4196 #ifdef INET6
4197 	case AF_INET6:
4198 		pfse->pfse_type = PFSE_IP6;
4199 		break;
4200 #endif /* INET6 */
4201 	}
4202 
4203 	pfse->pfse_m = m;
4204 	pf_send(pfse);
4205 }
4206 
4207 static void
4208 pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd,
4209     struct pf_state_key *sk, struct tcphdr *th,
4210     u_int16_t bproto_sum, u_int16_t bip_sum,
4211     u_short *reason, int rtableid)
4212 {
4213 	struct pf_addr	* const saddr = pd->src;
4214 	struct pf_addr	* const daddr = pd->dst;
4215 
4216 	/* undo NAT changes, if they have taken place */
4217 	if (nr != NULL) {
4218 		PF_ACPY(saddr, &sk->addr[pd->sidx], pd->af);
4219 		PF_ACPY(daddr, &sk->addr[pd->didx], pd->af);
4220 		if (pd->sport)
4221 			*pd->sport = sk->port[pd->sidx];
4222 		if (pd->dport)
4223 			*pd->dport = sk->port[pd->didx];
4224 		if (pd->ip_sum)
4225 			*pd->ip_sum = bip_sum;
4226 		m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
4227 	}
4228 	if (pd->proto == IPPROTO_TCP &&
4229 	    ((r->rule_flag & PFRULE_RETURNRST) ||
4230 	    (r->rule_flag & PFRULE_RETURN)) &&
4231 	    !(tcp_get_flags(th) & TH_RST)) {
4232 		u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
4233 
4234 		if (pf_check_proto_cksum(pd->m, pd->off, pd->tot_len - pd->off,
4235 		    IPPROTO_TCP, pd->af))
4236 			REASON_SET(reason, PFRES_PROTCKSUM);
4237 		else {
4238 			if (tcp_get_flags(th) & TH_SYN)
4239 				ack++;
4240 			if (tcp_get_flags(th) & TH_FIN)
4241 				ack++;
4242 			pf_send_tcp(r, pd->af, pd->dst,
4243 				pd->src, th->th_dport, th->th_sport,
4244 				ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
4245 				r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid);
4246 		}
4247 	} else if (pd->proto == IPPROTO_SCTP &&
4248 	    (r->rule_flag & PFRULE_RETURN)) {
4249 		pf_send_sctp_abort(pd->af, pd, r->return_ttl, rtableid);
4250 	} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
4251 		r->return_icmp)
4252 		pf_send_icmp(pd->m, r->return_icmp >> 8,
4253 			r->return_icmp & 255, pd->af, r, rtableid);
4254 	else if (pd->proto != IPPROTO_ICMPV6 && pd->af == AF_INET6 &&
4255 		r->return_icmp6)
4256 		pf_send_icmp(pd->m, r->return_icmp6 >> 8,
4257 			r->return_icmp6 & 255, pd->af, r, rtableid);
4258 }
4259 
4260 static int
4261 pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
4262 {
4263 	struct m_tag *mtag;
4264 	u_int8_t mpcp;
4265 
4266 	mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
4267 	if (mtag == NULL)
4268 		return (0);
4269 
4270 	if (prio == PF_PRIO_ZERO)
4271 		prio = 0;
4272 
4273 	mpcp = *(uint8_t *)(mtag + 1);
4274 
4275 	return (mpcp == prio);
4276 }
4277 
4278 static int
4279 pf_icmp_to_bandlim(uint8_t type)
4280 {
4281 	switch (type) {
4282 		case ICMP_ECHO:
4283 		case ICMP_ECHOREPLY:
4284 			return (BANDLIM_ICMP_ECHO);
4285 		case ICMP_TSTAMP:
4286 		case ICMP_TSTAMPREPLY:
4287 			return (BANDLIM_ICMP_TSTAMP);
4288 		case ICMP_UNREACH:
4289 		default:
4290 			return (BANDLIM_ICMP_UNREACH);
4291 	}
4292 }
4293 
4294 static void
4295 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
4296     struct pf_krule *r, int rtableid)
4297 {
4298 	struct pf_send_entry *pfse;
4299 	struct mbuf *m0;
4300 	struct pf_mtag *pf_mtag;
4301 
4302 	/* ICMP packet rate limitation. */
4303 	switch (af) {
4304 #ifdef INET6
4305 	case AF_INET6:
4306 		if (icmp6_ratelimit(NULL, type, code))
4307 			return;
4308 		break;
4309 #endif
4310 #ifdef INET
4311 	case AF_INET:
4312 		if (badport_bandlim(pf_icmp_to_bandlim(type)) != 0)
4313 			return;
4314 		break;
4315 #endif
4316 	}
4317 
4318 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
4319 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
4320 	if (pfse == NULL)
4321 		return;
4322 
4323 	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
4324 		free(pfse, M_PFTEMP);
4325 		return;
4326 	}
4327 
4328 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
4329 		free(pfse, M_PFTEMP);
4330 		return;
4331 	}
4332 	/* XXX: revisit */
4333 	m0->m_flags |= M_SKIP_FIREWALL;
4334 
4335 	if (rtableid >= 0)
4336 		M_SETFIB(m0, rtableid);
4337 
4338 #ifdef ALTQ
4339 	if (r->qid) {
4340 		pf_mtag->qid = r->qid;
4341 		/* add hints for ecn */
4342 		pf_mtag->hdr = mtod(m0, struct ip *);
4343 	}
4344 #endif /* ALTQ */
4345 
4346 	switch (af) {
4347 #ifdef INET
4348 	case AF_INET:
4349 		pfse->pfse_type = PFSE_ICMP;
4350 		break;
4351 #endif /* INET */
4352 #ifdef INET6
4353 	case AF_INET6:
4354 		pfse->pfse_type = PFSE_ICMP6;
4355 		break;
4356 #endif /* INET6 */
4357 	}
4358 	pfse->pfse_m = m0;
4359 	pfse->icmpopts.type = type;
4360 	pfse->icmpopts.code = code;
4361 	pf_send(pfse);
4362 }
4363 
4364 /*
4365  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
4366  * If n is 0, they match if they are equal. If n is != 0, they match if they
4367  * are different.
4368  */
4369 int
4370 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
4371     struct pf_addr *b, sa_family_t af)
4372 {
4373 	int	match = 0;
4374 
4375 	switch (af) {
4376 #ifdef INET
4377 	case AF_INET:
4378 		if (IN_ARE_MASKED_ADDR_EQUAL(a->v4, b->v4, m->v4))
4379 			match++;
4380 		break;
4381 #endif /* INET */
4382 #ifdef INET6
4383 	case AF_INET6:
4384 		if (IN6_ARE_MASKED_ADDR_EQUAL(&a->v6, &b->v6, &m->v6))
4385 			match++;
4386 		break;
4387 #endif /* INET6 */
4388 	}
4389 	if (match) {
4390 		if (n)
4391 			return (0);
4392 		else
4393 			return (1);
4394 	} else {
4395 		if (n)
4396 			return (1);
4397 		else
4398 			return (0);
4399 	}
4400 }
4401 
4402 /*
4403  * Return 1 if b <= a <= e, otherwise return 0.
4404  */
4405 int
4406 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
4407     struct pf_addr *a, sa_family_t af)
4408 {
4409 	switch (af) {
4410 #ifdef INET
4411 	case AF_INET:
4412 		if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
4413 		    (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
4414 			return (0);
4415 		break;
4416 #endif /* INET */
4417 #ifdef INET6
4418 	case AF_INET6: {
4419 		int	i;
4420 
4421 		/* check a >= b */
4422 		for (i = 0; i < 4; ++i)
4423 			if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
4424 				break;
4425 			else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
4426 				return (0);
4427 		/* check a <= e */
4428 		for (i = 0; i < 4; ++i)
4429 			if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
4430 				break;
4431 			else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
4432 				return (0);
4433 		break;
4434 	}
4435 #endif /* INET6 */
4436 	}
4437 	return (1);
4438 }
4439 
4440 static int
4441 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
4442 {
4443 	switch (op) {
4444 	case PF_OP_IRG:
4445 		return ((p > a1) && (p < a2));
4446 	case PF_OP_XRG:
4447 		return ((p < a1) || (p > a2));
4448 	case PF_OP_RRG:
4449 		return ((p >= a1) && (p <= a2));
4450 	case PF_OP_EQ:
4451 		return (p == a1);
4452 	case PF_OP_NE:
4453 		return (p != a1);
4454 	case PF_OP_LT:
4455 		return (p < a1);
4456 	case PF_OP_LE:
4457 		return (p <= a1);
4458 	case PF_OP_GT:
4459 		return (p > a1);
4460 	case PF_OP_GE:
4461 		return (p >= a1);
4462 	}
4463 	return (0); /* never reached */
4464 }
4465 
4466 int
4467 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
4468 {
4469 	NTOHS(a1);
4470 	NTOHS(a2);
4471 	NTOHS(p);
4472 	return (pf_match(op, a1, a2, p));
4473 }
4474 
4475 static int
4476 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
4477 {
4478 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
4479 		return (0);
4480 	return (pf_match(op, a1, a2, u));
4481 }
4482 
4483 static int
4484 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
4485 {
4486 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
4487 		return (0);
4488 	return (pf_match(op, a1, a2, g));
4489 }
4490 
4491 int
4492 pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag)
4493 {
4494 	if (*tag == -1)
4495 		*tag = mtag;
4496 
4497 	return ((!r->match_tag_not && r->match_tag == *tag) ||
4498 	    (r->match_tag_not && r->match_tag != *tag));
4499 }
4500 
4501 static int
4502 pf_match_rcvif(struct mbuf *m, struct pf_krule *r)
4503 {
4504 	struct ifnet *ifp = m->m_pkthdr.rcvif;
4505 	struct pfi_kkif *kif;
4506 
4507 	if (ifp == NULL)
4508 		return (0);
4509 
4510 	kif = (struct pfi_kkif *)ifp->if_pf_kif;
4511 
4512 	if (kif == NULL) {
4513 		DPFPRINTF(PF_DEBUG_URGENT,
4514 		    ("pf_test_via: kif == NULL, @%d via %s\n", r->nr,
4515 			r->rcv_ifname));
4516 		return (0);
4517 	}
4518 
4519 	return (pfi_kkif_match(r->rcv_kif, kif));
4520 }
4521 
4522 int
4523 pf_tag_packet(struct pf_pdesc *pd, int tag)
4524 {
4525 
4526 	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
4527 
4528 	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL))
4529 		return (ENOMEM);
4530 
4531 	pd->pf_mtag->tag = tag;
4532 
4533 	return (0);
4534 }
4535 
4536 #define	PF_ANCHOR_STACKSIZE	32
4537 struct pf_kanchor_stackframe {
4538 	struct pf_kruleset	*rs;
4539 	struct pf_krule		*r;	/* XXX: + match bit */
4540 	struct pf_kanchor	*child;
4541 };
4542 
4543 /*
4544  * XXX: We rely on malloc(9) returning pointer aligned addresses.
4545  */
4546 #define	PF_ANCHORSTACK_MATCH	0x00000001
4547 #define	PF_ANCHORSTACK_MASK	(PF_ANCHORSTACK_MATCH)
4548 
4549 #define	PF_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
4550 #define	PF_ANCHOR_RULE(f)	(struct pf_krule *)			\
4551 				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
4552 #define	PF_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 			\
4553 				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
4554 } while (0)
4555 
4556 void
4557 pf_step_into_anchor(struct pf_kanchor_stackframe *stack, int *depth,
4558     struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a,
4559     int *match)
4560 {
4561 	struct pf_kanchor_stackframe	*f;
4562 
4563 	PF_RULES_RASSERT();
4564 
4565 	if (match)
4566 		*match = 0;
4567 	if (*depth >= PF_ANCHOR_STACKSIZE) {
4568 		printf("%s: anchor stack overflow on %s\n",
4569 		    __func__, (*r)->anchor->name);
4570 		*r = TAILQ_NEXT(*r, entries);
4571 		return;
4572 	} else if (*depth == 0 && a != NULL)
4573 		*a = *r;
4574 	f = stack + (*depth)++;
4575 	f->rs = *rs;
4576 	f->r = *r;
4577 	if ((*r)->anchor_wildcard) {
4578 		struct pf_kanchor_node *parent = &(*r)->anchor->children;
4579 
4580 		if ((f->child = RB_MIN(pf_kanchor_node, parent)) == NULL) {
4581 			*r = NULL;
4582 			return;
4583 		}
4584 		*rs = &f->child->ruleset;
4585 	} else {
4586 		f->child = NULL;
4587 		*rs = &(*r)->anchor->ruleset;
4588 	}
4589 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
4590 }
4591 
4592 int
4593 pf_step_out_of_anchor(struct pf_kanchor_stackframe *stack, int *depth,
4594     struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a,
4595     int *match)
4596 {
4597 	struct pf_kanchor_stackframe	*f;
4598 	struct pf_krule *fr;
4599 	int quick = 0;
4600 
4601 	PF_RULES_RASSERT();
4602 
4603 	do {
4604 		if (*depth <= 0)
4605 			break;
4606 		f = stack + *depth - 1;
4607 		fr = PF_ANCHOR_RULE(f);
4608 		if (f->child != NULL) {
4609 			/*
4610 			 * This block traverses through
4611 			 * a wildcard anchor.
4612 			 */
4613 			if (match != NULL && *match) {
4614 				/*
4615 				 * If any of "*" matched, then
4616 				 * "foo/ *" matched, mark frame
4617 				 * appropriately.
4618 				 */
4619 				PF_ANCHOR_SET_MATCH(f);
4620 				*match = 0;
4621 			}
4622 			f->child = RB_NEXT(pf_kanchor_node,
4623 			    &fr->anchor->children, f->child);
4624 			if (f->child != NULL) {
4625 				*rs = &f->child->ruleset;
4626 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
4627 				if (*r == NULL)
4628 					continue;
4629 				else
4630 					break;
4631 			}
4632 		}
4633 		(*depth)--;
4634 		if (*depth == 0 && a != NULL)
4635 			*a = NULL;
4636 		*rs = f->rs;
4637 		if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
4638 			quick = fr->quick;
4639 		*r = TAILQ_NEXT(fr, entries);
4640 	} while (*r == NULL);
4641 
4642 	return (quick);
4643 }
4644 
4645 struct pf_keth_anchor_stackframe {
4646 	struct pf_keth_ruleset	*rs;
4647 	struct pf_keth_rule	*r;	/* XXX: + match bit */
4648 	struct pf_keth_anchor	*child;
4649 };
4650 
4651 #define	PF_ETH_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
4652 #define	PF_ETH_ANCHOR_RULE(f)	(struct pf_keth_rule *)			\
4653 				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
4654 #define	PF_ETH_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 		\
4655 				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
4656 } while (0)
4657 
4658 void
4659 pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
4660     struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
4661     struct pf_keth_rule **a, int *match)
4662 {
4663 	struct pf_keth_anchor_stackframe	*f;
4664 
4665 	NET_EPOCH_ASSERT();
4666 
4667 	if (match)
4668 		*match = 0;
4669 	if (*depth >= PF_ANCHOR_STACKSIZE) {
4670 		printf("%s: anchor stack overflow on %s\n",
4671 		    __func__, (*r)->anchor->name);
4672 		*r = TAILQ_NEXT(*r, entries);
4673 		return;
4674 	} else if (*depth == 0 && a != NULL)
4675 		*a = *r;
4676 	f = stack + (*depth)++;
4677 	f->rs = *rs;
4678 	f->r = *r;
4679 	if ((*r)->anchor_wildcard) {
4680 		struct pf_keth_anchor_node *parent = &(*r)->anchor->children;
4681 
4682 		if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) {
4683 			*r = NULL;
4684 			return;
4685 		}
4686 		*rs = &f->child->ruleset;
4687 	} else {
4688 		f->child = NULL;
4689 		*rs = &(*r)->anchor->ruleset;
4690 	}
4691 	*r = TAILQ_FIRST((*rs)->active.rules);
4692 }
4693 
4694 int
4695 pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
4696     struct pf_keth_ruleset **rs, struct pf_keth_rule **r,
4697     struct pf_keth_rule **a, int *match)
4698 {
4699 	struct pf_keth_anchor_stackframe	*f;
4700 	struct pf_keth_rule *fr;
4701 	int quick = 0;
4702 
4703 	NET_EPOCH_ASSERT();
4704 
4705 	do {
4706 		if (*depth <= 0)
4707 			break;
4708 		f = stack + *depth - 1;
4709 		fr = PF_ETH_ANCHOR_RULE(f);
4710 		if (f->child != NULL) {
4711 			/*
4712 			 * This block traverses through
4713 			 * a wildcard anchor.
4714 			 */
4715 			if (match != NULL && *match) {
4716 				/*
4717 				 * If any of "*" matched, then
4718 				 * "foo/ *" matched, mark frame
4719 				 * appropriately.
4720 				 */
4721 				PF_ETH_ANCHOR_SET_MATCH(f);
4722 				*match = 0;
4723 			}
4724 			f->child = RB_NEXT(pf_keth_anchor_node,
4725 			    &fr->anchor->children, f->child);
4726 			if (f->child != NULL) {
4727 				*rs = &f->child->ruleset;
4728 				*r = TAILQ_FIRST((*rs)->active.rules);
4729 				if (*r == NULL)
4730 					continue;
4731 				else
4732 					break;
4733 			}
4734 		}
4735 		(*depth)--;
4736 		if (*depth == 0 && a != NULL)
4737 			*a = NULL;
4738 		*rs = f->rs;
4739 		if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match))
4740 			quick = fr->quick;
4741 		*r = TAILQ_NEXT(fr, entries);
4742 	} while (*r == NULL);
4743 
4744 	return (quick);
4745 }
4746 
4747 #ifdef INET6
4748 void
4749 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
4750     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
4751 {
4752 	switch (af) {
4753 #ifdef INET
4754 	case AF_INET:
4755 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
4756 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
4757 		break;
4758 #endif /* INET */
4759 	case AF_INET6:
4760 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
4761 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
4762 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
4763 		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
4764 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
4765 		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
4766 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
4767 		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
4768 		break;
4769 	}
4770 }
4771 
4772 void
4773 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
4774 {
4775 	switch (af) {
4776 #ifdef INET
4777 	case AF_INET:
4778 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
4779 		break;
4780 #endif /* INET */
4781 	case AF_INET6:
4782 		if (addr->addr32[3] == 0xffffffff) {
4783 			addr->addr32[3] = 0;
4784 			if (addr->addr32[2] == 0xffffffff) {
4785 				addr->addr32[2] = 0;
4786 				if (addr->addr32[1] == 0xffffffff) {
4787 					addr->addr32[1] = 0;
4788 					addr->addr32[0] =
4789 					    htonl(ntohl(addr->addr32[0]) + 1);
4790 				} else
4791 					addr->addr32[1] =
4792 					    htonl(ntohl(addr->addr32[1]) + 1);
4793 			} else
4794 				addr->addr32[2] =
4795 				    htonl(ntohl(addr->addr32[2]) + 1);
4796 		} else
4797 			addr->addr32[3] =
4798 			    htonl(ntohl(addr->addr32[3]) + 1);
4799 		break;
4800 	}
4801 }
4802 #endif /* INET6 */
4803 
4804 void
4805 pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
4806 {
4807 	/*
4808 	 * Modern rules use the same flags in rules as they do in states.
4809 	 */
4810 	a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
4811 	    PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
4812 
4813 	/*
4814 	 * Old-style scrub rules have different flags which need to be translated.
4815 	 */
4816 	if (r->rule_flag & PFRULE_RANDOMID)
4817 		a->flags |= PFSTATE_RANDOMID;
4818 	if (r->scrub_flags & PFSTATE_SETTOS || r->rule_flag & PFRULE_SET_TOS ) {
4819 		a->flags |= PFSTATE_SETTOS;
4820 		a->set_tos = r->set_tos;
4821 	}
4822 
4823 	if (r->qid)
4824 		a->qid = r->qid;
4825 	if (r->pqid)
4826 		a->pqid = r->pqid;
4827 	if (r->rtableid >= 0)
4828 		a->rtableid = r->rtableid;
4829 	a->log |= r->log;
4830 	if (r->min_ttl)
4831 		a->min_ttl = r->min_ttl;
4832 	if (r->max_mss)
4833 		a->max_mss = r->max_mss;
4834 	if (r->dnpipe)
4835 		a->dnpipe = r->dnpipe;
4836 	if (r->dnrpipe)
4837 		a->dnrpipe = r->dnrpipe;
4838 	if (r->dnpipe || r->dnrpipe) {
4839 		if (r->free_flags & PFRULE_DN_IS_PIPE)
4840 			a->flags |= PFSTATE_DN_IS_PIPE;
4841 		else
4842 			a->flags &= ~PFSTATE_DN_IS_PIPE;
4843 	}
4844 	if (r->scrub_flags & PFSTATE_SETPRIO) {
4845 		a->set_prio[0] = r->set_prio[0];
4846 		a->set_prio[1] = r->set_prio[1];
4847 	}
4848 }
4849 
4850 int
4851 pf_socket_lookup(struct pf_pdesc *pd)
4852 {
4853 	struct pf_addr		*saddr, *daddr;
4854 	u_int16_t		 sport, dport;
4855 	struct inpcbinfo	*pi;
4856 	struct inpcb		*inp;
4857 
4858 	pd->lookup.uid = UID_MAX;
4859 	pd->lookup.gid = GID_MAX;
4860 
4861 	switch (pd->proto) {
4862 	case IPPROTO_TCP:
4863 		sport = pd->hdr.tcp.th_sport;
4864 		dport = pd->hdr.tcp.th_dport;
4865 		pi = &V_tcbinfo;
4866 		break;
4867 	case IPPROTO_UDP:
4868 		sport = pd->hdr.udp.uh_sport;
4869 		dport = pd->hdr.udp.uh_dport;
4870 		pi = &V_udbinfo;
4871 		break;
4872 	default:
4873 		return (-1);
4874 	}
4875 	if (pd->dir == PF_IN) {
4876 		saddr = pd->src;
4877 		daddr = pd->dst;
4878 	} else {
4879 		u_int16_t	p;
4880 
4881 		p = sport;
4882 		sport = dport;
4883 		dport = p;
4884 		saddr = pd->dst;
4885 		daddr = pd->src;
4886 	}
4887 	switch (pd->af) {
4888 #ifdef INET
4889 	case AF_INET:
4890 		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
4891 		    dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
4892 		if (inp == NULL) {
4893 			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
4894 			   daddr->v4, dport, INPLOOKUP_WILDCARD |
4895 			   INPLOOKUP_RLOCKPCB, NULL, pd->m);
4896 			if (inp == NULL)
4897 				return (-1);
4898 		}
4899 		break;
4900 #endif /* INET */
4901 #ifdef INET6
4902 	case AF_INET6:
4903 		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
4904 		    dport, INPLOOKUP_RLOCKPCB, NULL, pd->m);
4905 		if (inp == NULL) {
4906 			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
4907 			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
4908 			    INPLOOKUP_RLOCKPCB, NULL, pd->m);
4909 			if (inp == NULL)
4910 				return (-1);
4911 		}
4912 		break;
4913 #endif /* INET6 */
4914 	}
4915 	INP_RLOCK_ASSERT(inp);
4916 	pd->lookup.uid = inp->inp_cred->cr_uid;
4917 	pd->lookup.gid = inp->inp_cred->cr_groups[0];
4918 	INP_RUNLOCK(inp);
4919 
4920 	return (1);
4921 }
4922 
4923 u_int8_t
4924 pf_get_wscale(struct pf_pdesc *pd)
4925 {
4926 	struct tcphdr	*th = &pd->hdr.tcp;
4927 	int		 hlen;
4928 	u_int8_t	 hdr[60];
4929 	u_int8_t	*opt, optlen;
4930 	u_int8_t	 wscale = 0;
4931 
4932 	hlen = th->th_off << 2;		/* hlen <= sizeof(hdr) */
4933 	if (hlen <= sizeof(struct tcphdr))
4934 		return (0);
4935 	if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af))
4936 		return (0);
4937 	opt = hdr + sizeof(struct tcphdr);
4938 	hlen -= sizeof(struct tcphdr);
4939 	while (hlen >= 3) {
4940 		switch (*opt) {
4941 		case TCPOPT_EOL:
4942 		case TCPOPT_NOP:
4943 			++opt;
4944 			--hlen;
4945 			break;
4946 		case TCPOPT_WINDOW:
4947 			wscale = opt[2];
4948 			if (wscale > TCP_MAX_WINSHIFT)
4949 				wscale = TCP_MAX_WINSHIFT;
4950 			wscale |= PF_WSCALE_FLAG;
4951 			/* FALLTHROUGH */
4952 		default:
4953 			optlen = opt[1];
4954 			if (optlen < 2)
4955 				optlen = 2;
4956 			hlen -= optlen;
4957 			opt += optlen;
4958 			break;
4959 		}
4960 	}
4961 	return (wscale);
4962 }
4963 
4964 u_int16_t
4965 pf_get_mss(struct pf_pdesc *pd)
4966 {
4967 	struct tcphdr	*th = &pd->hdr.tcp;
4968 	int		 hlen;
4969 	u_int8_t	 hdr[60];
4970 	u_int8_t	*opt, optlen;
4971 	u_int16_t	 mss = V_tcp_mssdflt;
4972 
4973 	hlen = th->th_off << 2;	/* hlen <= sizeof(hdr) */
4974 	if (hlen <= sizeof(struct tcphdr))
4975 		return (0);
4976 	if (!pf_pull_hdr(pd->m, pd->off, hdr, hlen, NULL, NULL, pd->af))
4977 		return (0);
4978 	opt = hdr + sizeof(struct tcphdr);
4979 	hlen -= sizeof(struct tcphdr);
4980 	while (hlen >= TCPOLEN_MAXSEG) {
4981 		switch (*opt) {
4982 		case TCPOPT_EOL:
4983 		case TCPOPT_NOP:
4984 			++opt;
4985 			--hlen;
4986 			break;
4987 		case TCPOPT_MAXSEG:
4988 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4989 			NTOHS(mss);
4990 			/* FALLTHROUGH */
4991 		default:
4992 			optlen = opt[1];
4993 			if (optlen < 2)
4994 				optlen = 2;
4995 			hlen -= optlen;
4996 			opt += optlen;
4997 			break;
4998 		}
4999 	}
5000 	return (mss);
5001 }
5002 
5003 static u_int16_t
5004 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
5005 {
5006 	struct nhop_object *nh;
5007 #ifdef INET6
5008 	struct in6_addr		dst6;
5009 	uint32_t		scopeid;
5010 #endif /* INET6 */
5011 	int			 hlen = 0;
5012 	uint16_t		 mss = 0;
5013 
5014 	NET_EPOCH_ASSERT();
5015 
5016 	switch (af) {
5017 #ifdef INET
5018 	case AF_INET:
5019 		hlen = sizeof(struct ip);
5020 		nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0);
5021 		if (nh != NULL)
5022 			mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
5023 		break;
5024 #endif /* INET */
5025 #ifdef INET6
5026 	case AF_INET6:
5027 		hlen = sizeof(struct ip6_hdr);
5028 		in6_splitscope(&addr->v6, &dst6, &scopeid);
5029 		nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0);
5030 		if (nh != NULL)
5031 			mss = nh->nh_mtu - hlen - sizeof(struct tcphdr);
5032 		break;
5033 #endif /* INET6 */
5034 	}
5035 
5036 	mss = max(V_tcp_mssdflt, mss);
5037 	mss = min(mss, offer);
5038 	mss = max(mss, 64);		/* sanity - at least max opt space */
5039 	return (mss);
5040 }
5041 
5042 static u_int32_t
5043 pf_tcp_iss(struct pf_pdesc *pd)
5044 {
5045 	MD5_CTX ctx;
5046 	u_int32_t digest[4];
5047 
5048 	if (V_pf_tcp_secret_init == 0) {
5049 		arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
5050 		MD5Init(&V_pf_tcp_secret_ctx);
5051 		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
5052 		    sizeof(V_pf_tcp_secret));
5053 		V_pf_tcp_secret_init = 1;
5054 	}
5055 
5056 	ctx = V_pf_tcp_secret_ctx;
5057 
5058 	MD5Update(&ctx, (char *)&pd->hdr.tcp.th_sport, sizeof(u_short));
5059 	MD5Update(&ctx, (char *)&pd->hdr.tcp.th_dport, sizeof(u_short));
5060 	switch (pd->af) {
5061 	case AF_INET6:
5062 		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
5063 		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
5064 		break;
5065 	case AF_INET:
5066 		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
5067 		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
5068 		break;
5069 	}
5070 	MD5Final((u_char *)digest, &ctx);
5071 	V_pf_tcp_iss_off += 4096;
5072 #define	ISN_RANDOM_INCREMENT (4096 - 1)
5073 	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
5074 	    V_pf_tcp_iss_off);
5075 #undef	ISN_RANDOM_INCREMENT
5076 }
5077 
5078 static bool
5079 pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r)
5080 {
5081 	bool match = true;
5082 
5083 	/* Always matches if not set */
5084 	if (! r->isset)
5085 		return (!r->neg);
5086 
5087 	for (int i = 0; i < ETHER_ADDR_LEN; i++) {
5088 		if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) {
5089 			match = false;
5090 			break;
5091 		}
5092 	}
5093 
5094 	return (match ^ r->neg);
5095 }
5096 
5097 static int
5098 pf_match_eth_tag(struct mbuf *m, struct pf_keth_rule *r, int *tag, int mtag)
5099 {
5100 	if (*tag == -1)
5101 		*tag = mtag;
5102 
5103 	return ((!r->match_tag_not && r->match_tag == *tag) ||
5104 	    (r->match_tag_not && r->match_tag != *tag));
5105 }
5106 
5107 static void
5108 pf_bridge_to(struct ifnet *ifp, struct mbuf *m)
5109 {
5110 	/* If we don't have the interface drop the packet. */
5111 	if (ifp == NULL) {
5112 		m_freem(m);
5113 		return;
5114 	}
5115 
5116 	switch (ifp->if_type) {
5117 	case IFT_ETHER:
5118 	case IFT_XETHER:
5119 	case IFT_L2VLAN:
5120 	case IFT_BRIDGE:
5121 	case IFT_IEEE8023ADLAG:
5122 		break;
5123 	default:
5124 		m_freem(m);
5125 		return;
5126 	}
5127 
5128 	ifp->if_transmit(ifp, m);
5129 }
5130 
5131 static int
5132 pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
5133 {
5134 #ifdef INET
5135 	struct ip ip;
5136 #endif
5137 #ifdef INET6
5138 	struct ip6_hdr ip6;
5139 #endif
5140 	struct mbuf *m = *m0;
5141 	struct ether_header *e;
5142 	struct pf_keth_rule *r, *rm, *a = NULL;
5143 	struct pf_keth_ruleset *ruleset = NULL;
5144 	struct pf_mtag *mtag;
5145 	struct pf_keth_ruleq *rules;
5146 	struct pf_addr *src = NULL, *dst = NULL;
5147 	struct pfi_kkif *bridge_to;
5148 	sa_family_t af = 0;
5149 	uint16_t proto;
5150 	int asd = 0, match = 0;
5151 	int tag = -1;
5152 	uint8_t action;
5153 	struct pf_keth_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
5154 
5155 	MPASS(kif->pfik_ifp->if_vnet == curvnet);
5156 	NET_EPOCH_ASSERT();
5157 
5158 	PF_RULES_RLOCK_TRACKER;
5159 
5160 	SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m);
5161 
5162 	mtag = pf_find_mtag(m);
5163 	if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
5164 		/* Dummynet re-injects packets after they've
5165 		 * completed their delay. We've already
5166 		 * processed them, so pass unconditionally. */
5167 
5168 		/* But only once. We may see the packet multiple times (e.g.
5169 		 * PFIL_IN/PFIL_OUT). */
5170 		pf_dummynet_flag_remove(m, mtag);
5171 
5172 		return (PF_PASS);
5173 	}
5174 
5175 	ruleset = V_pf_keth;
5176 	rules = ck_pr_load_ptr(&ruleset->active.rules);
5177 	r = TAILQ_FIRST(rules);
5178 	rm = NULL;
5179 
5180 	if (__predict_false(m->m_len < sizeof(struct ether_header)) &&
5181 	    (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) {
5182 		DPFPRINTF(PF_DEBUG_URGENT,
5183 		    ("pf_test_eth_rule: m_len < sizeof(struct ether_header)"
5184 		     ", pullup failed\n"));
5185 		return (PF_DROP);
5186 	}
5187 	e = mtod(m, struct ether_header *);
5188 	proto = ntohs(e->ether_type);
5189 
5190 	switch (proto) {
5191 #ifdef INET
5192 	case ETHERTYPE_IP: {
5193 		if (m_length(m, NULL) < (sizeof(struct ether_header) +
5194 		    sizeof(ip)))
5195 			return (PF_DROP);
5196 
5197 		af = AF_INET;
5198 		m_copydata(m, sizeof(struct ether_header), sizeof(ip),
5199 		    (caddr_t)&ip);
5200 		src = (struct pf_addr *)&ip.ip_src;
5201 		dst = (struct pf_addr *)&ip.ip_dst;
5202 		break;
5203 	}
5204 #endif /* INET */
5205 #ifdef INET6
5206 	case ETHERTYPE_IPV6: {
5207 		if (m_length(m, NULL) < (sizeof(struct ether_header) +
5208 		    sizeof(ip6)))
5209 			return (PF_DROP);
5210 
5211 		af = AF_INET6;
5212 		m_copydata(m, sizeof(struct ether_header), sizeof(ip6),
5213 		    (caddr_t)&ip6);
5214 		src = (struct pf_addr *)&ip6.ip6_src;
5215 		dst = (struct pf_addr *)&ip6.ip6_dst;
5216 		break;
5217 	}
5218 #endif /* INET6 */
5219 	}
5220 
5221 	PF_RULES_RLOCK();
5222 
5223 	while (r != NULL) {
5224 		counter_u64_add(r->evaluations, 1);
5225 		SDT_PROBE2(pf, eth, test_rule, test, r->nr, r);
5226 
5227 		if (pfi_kkif_match(r->kif, kif) == r->ifnot) {
5228 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5229 			    "kif");
5230 			r = r->skip[PFE_SKIP_IFP].ptr;
5231 		}
5232 		else if (r->direction && r->direction != dir) {
5233 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5234 			    "dir");
5235 			r = r->skip[PFE_SKIP_DIR].ptr;
5236 		}
5237 		else if (r->proto && r->proto != proto) {
5238 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5239 			    "proto");
5240 			r = r->skip[PFE_SKIP_PROTO].ptr;
5241 		}
5242 		else if (! pf_match_eth_addr(e->ether_shost, &r->src)) {
5243 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5244 			    "src");
5245 			r = r->skip[PFE_SKIP_SRC_ADDR].ptr;
5246 		}
5247 		else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) {
5248 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5249 			    "dst");
5250 			r = r->skip[PFE_SKIP_DST_ADDR].ptr;
5251 		}
5252 		else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af,
5253 		    r->ipsrc.neg, kif, M_GETFIB(m))) {
5254 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5255 			    "ip_src");
5256 			r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr;
5257 		}
5258 		else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af,
5259 		    r->ipdst.neg, kif, M_GETFIB(m))) {
5260 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5261 			    "ip_dst");
5262 			r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr;
5263 		}
5264 		else if (r->match_tag && !pf_match_eth_tag(m, r, &tag,
5265 		    mtag ? mtag->tag : 0)) {
5266 			SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r,
5267 			    "match_tag");
5268 			r = TAILQ_NEXT(r, entries);
5269 		}
5270 		else {
5271 			if (r->tag)
5272 				tag = r->tag;
5273 			if (r->anchor == NULL) {
5274 				/* Rule matches */
5275 				rm = r;
5276 
5277 				SDT_PROBE2(pf, eth, test_rule, match, r->nr, r);
5278 
5279 				if (r->quick)
5280 					break;
5281 
5282 				r = TAILQ_NEXT(r, entries);
5283 			} else {
5284 				pf_step_into_keth_anchor(anchor_stack, &asd,
5285 				    &ruleset, &r, &a, &match);
5286 			}
5287 		}
5288 		if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd,
5289 		    &ruleset, &r, &a, &match))
5290 			break;
5291 	}
5292 
5293 	r = rm;
5294 
5295 	SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r);
5296 
5297 	/* Default to pass. */
5298 	if (r == NULL) {
5299 		PF_RULES_RUNLOCK();
5300 		return (PF_PASS);
5301 	}
5302 
5303 	/* Execute action. */
5304 	counter_u64_add(r->packets[dir == PF_OUT], 1);
5305 	counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
5306 	pf_update_timestamp(r);
5307 
5308 	/* Shortcut. Don't tag if we're just going to drop anyway. */
5309 	if (r->action == PF_DROP) {
5310 		PF_RULES_RUNLOCK();
5311 		return (PF_DROP);
5312 	}
5313 
5314 	if (tag > 0) {
5315 		if (mtag == NULL)
5316 			mtag = pf_get_mtag(m);
5317 		if (mtag == NULL) {
5318 			PF_RULES_RUNLOCK();
5319 			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5320 			return (PF_DROP);
5321 		}
5322 		mtag->tag = tag;
5323 	}
5324 
5325 	if (r->qid != 0) {
5326 		if (mtag == NULL)
5327 			mtag = pf_get_mtag(m);
5328 		if (mtag == NULL) {
5329 			PF_RULES_RUNLOCK();
5330 			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5331 			return (PF_DROP);
5332 		}
5333 		mtag->qid = r->qid;
5334 	}
5335 
5336 	action = r->action;
5337 	bridge_to = r->bridge_to;
5338 
5339 	/* Dummynet */
5340 	if (r->dnpipe) {
5341 		struct ip_fw_args dnflow;
5342 
5343 		/* Drop packet if dummynet is not loaded. */
5344 		if (ip_dn_io_ptr == NULL) {
5345 			PF_RULES_RUNLOCK();
5346 			m_freem(m);
5347 			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5348 			return (PF_DROP);
5349 		}
5350 		if (mtag == NULL)
5351 			mtag = pf_get_mtag(m);
5352 		if (mtag == NULL) {
5353 			PF_RULES_RUNLOCK();
5354 			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
5355 			return (PF_DROP);
5356 		}
5357 
5358 		bzero(&dnflow, sizeof(dnflow));
5359 
5360 		/* We don't have port numbers here, so we set 0.  That means
5361 		 * that we'll be somewhat limited in distinguishing flows (i.e.
5362 		 * only based on IP addresses, not based on port numbers), but
5363 		 * it's better than nothing. */
5364 		dnflow.f_id.dst_port = 0;
5365 		dnflow.f_id.src_port = 0;
5366 		dnflow.f_id.proto = 0;
5367 
5368 		dnflow.rule.info = r->dnpipe;
5369 		dnflow.rule.info |= IPFW_IS_DUMMYNET;
5370 		if (r->dnflags & PFRULE_DN_IS_PIPE)
5371 			dnflow.rule.info |= IPFW_IS_PIPE;
5372 
5373 		dnflow.f_id.extra = dnflow.rule.info;
5374 
5375 		dnflow.flags = dir == PF_IN ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
5376 		dnflow.flags |= IPFW_ARGS_ETHER;
5377 		dnflow.ifp = kif->pfik_ifp;
5378 
5379 		switch (af) {
5380 		case AF_INET:
5381 			dnflow.f_id.addr_type = 4;
5382 			dnflow.f_id.src_ip = src->v4.s_addr;
5383 			dnflow.f_id.dst_ip = dst->v4.s_addr;
5384 			break;
5385 		case AF_INET6:
5386 			dnflow.flags |= IPFW_ARGS_IP6;
5387 			dnflow.f_id.addr_type = 6;
5388 			dnflow.f_id.src_ip6 = src->v6;
5389 			dnflow.f_id.dst_ip6 = dst->v6;
5390 			break;
5391 		}
5392 
5393 		PF_RULES_RUNLOCK();
5394 
5395 		mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
5396 		ip_dn_io_ptr(m0, &dnflow);
5397 		if (*m0 != NULL)
5398 			pf_dummynet_flag_remove(m, mtag);
5399 	} else {
5400 		PF_RULES_RUNLOCK();
5401 	}
5402 
5403 	if (action == PF_PASS && bridge_to) {
5404 		pf_bridge_to(bridge_to->pfik_ifp, *m0);
5405 		*m0 = NULL; /* We've eaten the packet. */
5406 	}
5407 
5408 	return (action);
5409 }
5410 
5411 #define PF_TEST_ATTRIB(t, a)\
5412 	do {				\
5413 		if (t) {		\
5414 			r = a;		\
5415 			goto nextrule;	\
5416 		}			\
5417 	} while (0)
5418 
5419 static int
5420 pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
5421     struct pf_pdesc *pd, struct pf_krule **am,
5422     struct pf_kruleset **rsm, struct inpcb *inp)
5423 {
5424 	struct pf_krule		*nr = NULL;
5425 	struct pf_krule		*r, *a = NULL;
5426 	struct pf_kruleset	*ruleset = NULL;
5427 	struct pf_krule_slist	 match_rules;
5428 	struct pf_krule_item	*ri;
5429 	struct tcphdr		*th = &pd->hdr.tcp;
5430 	struct pf_state_key	*sk = NULL, *nk = NULL;
5431 	u_short			 reason, transerror;
5432 	int			 rewrite = 0;
5433 	int			 tag = -1;
5434 	int			 asd = 0;
5435 	int			 match = 0;
5436 	int			 state_icmp = 0, icmp_dir, multi;
5437 	u_int16_t		 virtual_type, virtual_id;
5438 	u_int16_t		 bproto_sum = 0, bip_sum = 0;
5439 	u_int8_t		 icmptype = 0, icmpcode = 0;
5440 	struct pf_kanchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
5441 	struct pf_udp_mapping	*udp_mapping = NULL;
5442 
5443 	PF_RULES_RASSERT();
5444 
5445 	PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5446 	PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5447 
5448 	SLIST_INIT(&match_rules);
5449 
5450 	if (inp != NULL) {
5451 		INP_LOCK_ASSERT(inp);
5452 		pd->lookup.uid = inp->inp_cred->cr_uid;
5453 		pd->lookup.gid = inp->inp_cred->cr_groups[0];
5454 		pd->lookup.done = 1;
5455 	}
5456 
5457 	switch (pd->virtual_proto) {
5458 	case IPPROTO_TCP:
5459 		pd->nsport = th->th_sport;
5460 		pd->ndport = th->th_dport;
5461 		break;
5462 	case IPPROTO_UDP:
5463 		pd->nsport = pd->hdr.udp.uh_sport;
5464 		pd->ndport = pd->hdr.udp.uh_dport;
5465 		break;
5466 	case IPPROTO_SCTP:
5467 		pd->nsport = pd->hdr.sctp.src_port;
5468 		pd->ndport = pd->hdr.sctp.dest_port;
5469 		break;
5470 #ifdef INET
5471 	case IPPROTO_ICMP:
5472 		MPASS(pd->af == AF_INET);
5473 		icmptype = pd->hdr.icmp.icmp_type;
5474 		icmpcode = pd->hdr.icmp.icmp_code;
5475 		state_icmp = pf_icmp_mapping(pd, icmptype,
5476 		    &icmp_dir, &multi, &virtual_id, &virtual_type);
5477 		if (icmp_dir == PF_IN) {
5478 			pd->nsport = virtual_id;
5479 			pd->ndport = virtual_type;
5480 		} else {
5481 			pd->nsport = virtual_type;
5482 			pd->ndport = virtual_id;
5483 		}
5484 		break;
5485 #endif /* INET */
5486 #ifdef INET6
5487 	case IPPROTO_ICMPV6:
5488 		MPASS(pd->af == AF_INET6);
5489 		icmptype = pd->hdr.icmp6.icmp6_type;
5490 		icmpcode = pd->hdr.icmp6.icmp6_code;
5491 		state_icmp = pf_icmp_mapping(pd, icmptype,
5492 		    &icmp_dir, &multi, &virtual_id, &virtual_type);
5493 		if (icmp_dir == PF_IN) {
5494 			pd->nsport = virtual_id;
5495 			pd->ndport = virtual_type;
5496 		} else {
5497 			pd->nsport = virtual_type;
5498 			pd->ndport = virtual_id;
5499 		}
5500 
5501 		break;
5502 #endif /* INET6 */
5503 	default:
5504 		pd->nsport = pd->ndport = 0;
5505 		break;
5506 	}
5507 	pd->osport = pd->nsport;
5508 	pd->odport = pd->ndport;
5509 
5510 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
5511 
5512 	/* check packet for BINAT/NAT/RDR */
5513 	transerror = pf_get_translation(pd, pd->off, &sk, &nk, anchor_stack,
5514 	    &nr, &udp_mapping);
5515 	switch (transerror) {
5516 	default:
5517 		/* A translation error occurred. */
5518 		REASON_SET(&reason, transerror);
5519 		goto cleanup;
5520 	case PFRES_MAX:
5521 		/* No match. */
5522 		break;
5523 	case PFRES_MATCH:
5524 		KASSERT(sk != NULL, ("%s: null sk", __func__));
5525 		KASSERT(nk != NULL, ("%s: null nk", __func__));
5526 
5527 		if (nr->log) {
5528 			PFLOG_PACKET(PF_PASS, PFRES_MATCH, nr, a,
5529 			    ruleset, pd, 1);
5530 		}
5531 
5532 		if (pd->ip_sum)
5533 			bip_sum = *pd->ip_sum;
5534 
5535 		switch (pd->proto) {
5536 		case IPPROTO_TCP:
5537 			bproto_sum = th->th_sum;
5538 
5539 			if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
5540 			    nk->port[pd->sidx] != pd->nsport) {
5541 				pf_change_ap(pd->m, pd->src, &th->th_sport,
5542 				    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
5543 				    nk->port[pd->sidx], 0, pd->af, pd->naf);
5544 				pd->sport = &th->th_sport;
5545 				pd->nsport = th->th_sport;
5546 				PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5547 			}
5548 
5549 			if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
5550 			    nk->port[pd->didx] != pd->ndport) {
5551 				pf_change_ap(pd->m, pd->dst, &th->th_dport,
5552 				    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
5553 				    nk->port[pd->didx], 0, pd->af, pd->naf);
5554 				pd->dport = &th->th_dport;
5555 				pd->ndport = th->th_dport;
5556 				PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5557 			}
5558 			rewrite++;
5559 			break;
5560 		case IPPROTO_UDP:
5561 			bproto_sum = pd->hdr.udp.uh_sum;
5562 
5563 			if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
5564 			    nk->port[pd->sidx] != pd->nsport) {
5565 				pf_change_ap(pd->m, pd->src,
5566 				    &pd->hdr.udp.uh_sport,
5567 				    pd->ip_sum, &pd->hdr.udp.uh_sum,
5568 				    &nk->addr[pd->sidx],
5569 				    nk->port[pd->sidx], 1, pd->af, pd->naf);
5570 				pd->sport = &pd->hdr.udp.uh_sport;
5571 				pd->nsport = pd->hdr.udp.uh_sport;
5572 				PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5573 			}
5574 
5575 			if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
5576 			    nk->port[pd->didx] != pd->ndport) {
5577 				pf_change_ap(pd->m, pd->dst,
5578 				    &pd->hdr.udp.uh_dport,
5579 				    pd->ip_sum, &pd->hdr.udp.uh_sum,
5580 				    &nk->addr[pd->didx],
5581 				    nk->port[pd->didx], 1, pd->af, pd->naf);
5582 				pd->dport = &pd->hdr.udp.uh_dport;
5583 				pd->ndport = pd->hdr.udp.uh_dport;
5584 				PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5585 			}
5586 			rewrite++;
5587 			break;
5588 		case IPPROTO_SCTP: {
5589 			uint16_t checksum = 0;
5590 
5591 			if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) ||
5592 			    nk->port[pd->sidx] != pd->nsport) {
5593 				pf_change_ap(pd->m, pd->src,
5594 				    &pd->hdr.sctp.src_port, pd->ip_sum, &checksum,
5595 				    &nk->addr[pd->sidx],
5596 				    nk->port[pd->sidx], 1, pd->af, pd->naf);
5597 				pd->sport = &pd->hdr.sctp.src_port;
5598 				pd->nsport = pd->hdr.sctp.src_port;
5599 				PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5600 			}
5601 			if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
5602 			    nk->port[pd->didx] != pd->ndport) {
5603 				pf_change_ap(pd->m, pd->dst,
5604 				    &pd->hdr.sctp.dest_port, pd->ip_sum, &checksum,
5605 				    &nk->addr[pd->didx],
5606 				    nk->port[pd->didx], 1, pd->af, pd->naf);
5607 				pd->dport = &pd->hdr.sctp.dest_port;
5608 				pd->ndport = pd->hdr.sctp.dest_port;
5609 				PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5610 			}
5611 			break;
5612 		}
5613 #ifdef INET
5614 		case IPPROTO_ICMP:
5615 			if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) {
5616 				pf_change_a(&pd->src->v4.s_addr, pd->ip_sum,
5617 				    nk->addr[pd->sidx].v4.s_addr, 0);
5618 				PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5619 			}
5620 
5621 			if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) {
5622 				pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum,
5623 				    nk->addr[pd->didx].v4.s_addr, 0);
5624 				PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5625 			}
5626 
5627 			if (virtual_type == htons(ICMP_ECHO) &&
5628 			     nk->port[pd->sidx] != pd->hdr.icmp.icmp_id) {
5629 				pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
5630 				    pd->hdr.icmp.icmp_cksum, pd->nsport,
5631 				    nk->port[pd->sidx], 0);
5632 				pd->hdr.icmp.icmp_id = nk->port[pd->sidx];
5633 				pd->sport = &pd->hdr.icmp.icmp_id;
5634 			}
5635 			m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
5636 			break;
5637 #endif /* INET */
5638 #ifdef INET6
5639 		case IPPROTO_ICMPV6:
5640 			if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) {
5641 				pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum,
5642 				    &nk->addr[pd->sidx], 0);
5643 				PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5644 			}
5645 
5646 			if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) {
5647 				pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum,
5648 				    &nk->addr[pd->didx], 0);
5649 				PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5650 			}
5651 			rewrite++;
5652 			break;
5653 #endif /* INET */
5654 		default:
5655 			switch (pd->af) {
5656 #ifdef INET
5657 			case AF_INET:
5658 				if (PF_ANEQ(&pd->nsaddr,
5659 				    &nk->addr[pd->sidx], AF_INET)) {
5660 					pf_change_a(&pd->src->v4.s_addr,
5661 					    pd->ip_sum,
5662 					    nk->addr[pd->sidx].v4.s_addr, 0);
5663 					PF_ACPY(&pd->nsaddr, pd->src, pd->af);
5664 				}
5665 
5666 				if (PF_ANEQ(&pd->ndaddr,
5667 				    &nk->addr[pd->didx], AF_INET)) {
5668 					pf_change_a(&pd->dst->v4.s_addr,
5669 					    pd->ip_sum,
5670 					    nk->addr[pd->didx].v4.s_addr, 0);
5671 					PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
5672 				}
5673 				break;
5674 #endif /* INET */
5675 #ifdef INET6
5676 			case AF_INET6:
5677 				if (PF_ANEQ(&pd->nsaddr,
5678 				    &nk->addr[pd->sidx], AF_INET6)) {
5679 					PF_ACPY(&pd->nsaddr, &nk->addr[pd->sidx], pd->af);
5680 					PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
5681 				}
5682 
5683 				if (PF_ANEQ(&pd->ndaddr,
5684 				    &nk->addr[pd->didx], AF_INET6)) {
5685 					PF_ACPY(&pd->ndaddr, &nk->addr[pd->didx], pd->af);
5686 					PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
5687 				}
5688 				break;
5689 #endif /* INET */
5690 			}
5691 			break;
5692 		}
5693 		if (nr->natpass)
5694 			r = NULL;
5695 	}
5696 
5697 	while (r != NULL) {
5698 		pf_counter_u64_add(&r->evaluations, 1);
5699 		PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot,
5700 			r->skip[PF_SKIP_IFP]);
5701 		PF_TEST_ATTRIB(r->direction && r->direction != pd->dir,
5702 			r->skip[PF_SKIP_DIR]);
5703 		PF_TEST_ATTRIB(r->af && r->af != pd->af,
5704 			r->skip[PF_SKIP_AF]);
5705 		PF_TEST_ATTRIB(r->proto && r->proto != pd->proto,
5706 			r->skip[PF_SKIP_PROTO]);
5707 		PF_TEST_ATTRIB(PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, pd->naf,
5708 		    r->src.neg, pd->kif, M_GETFIB(pd->m)),
5709 			r->skip[PF_SKIP_SRC_ADDR]);
5710 		PF_TEST_ATTRIB(PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af,
5711 		    r->dst.neg, NULL, M_GETFIB(pd->m)),
5712 			r->skip[PF_SKIP_DST_ADDR]);
5713 		switch (pd->virtual_proto) {
5714 		case PF_VPROTO_FRAGMENT:
5715 			/* tcp/udp only. port_op always 0 in other cases */
5716 			PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
5717 				TAILQ_NEXT(r, entries));
5718 			PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset),
5719 				TAILQ_NEXT(r, entries));
5720 			/* icmp only. type/code always 0 in other cases */
5721 			PF_TEST_ATTRIB((r->type || r->code),
5722 				TAILQ_NEXT(r, entries));
5723 			/* tcp/udp only. {uid|gid}.op always 0 in other cases */
5724 			PF_TEST_ATTRIB((r->gid.op || r->uid.op),
5725 				TAILQ_NEXT(r, entries));
5726 			break;
5727 
5728 		case IPPROTO_TCP:
5729 			PF_TEST_ATTRIB((r->flagset & tcp_get_flags(th)) != r->flags,
5730 				TAILQ_NEXT(r, entries));
5731 			/* FALLTHROUGH */
5732 		case IPPROTO_SCTP:
5733 		case IPPROTO_UDP:
5734 			/* tcp/udp only. port_op always 0 in other cases */
5735 			PF_TEST_ATTRIB(r->src.port_op && !pf_match_port(r->src.port_op,
5736 			    r->src.port[0], r->src.port[1], pd->nsport),
5737 				r->skip[PF_SKIP_SRC_PORT]);
5738 			/* tcp/udp only. port_op always 0 in other cases */
5739 			PF_TEST_ATTRIB(r->dst.port_op && !pf_match_port(r->dst.port_op,
5740 			    r->dst.port[0], r->dst.port[1], pd->ndport),
5741 				r->skip[PF_SKIP_DST_PORT]);
5742 			/* tcp/udp only. uid.op always 0 in other cases */
5743 			PF_TEST_ATTRIB(r->uid.op && (pd->lookup.done || (pd->lookup.done =
5744 			    pf_socket_lookup(pd), 1)) &&
5745 			    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5746 			    pd->lookup.uid),
5747 				TAILQ_NEXT(r, entries));
5748 			/* tcp/udp only. gid.op always 0 in other cases */
5749 			PF_TEST_ATTRIB(r->gid.op && (pd->lookup.done || (pd->lookup.done =
5750 			    pf_socket_lookup(pd), 1)) &&
5751 			    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5752 			    pd->lookup.gid),
5753 				TAILQ_NEXT(r, entries));
5754 			break;
5755 
5756 		case IPPROTO_ICMP:
5757 		case IPPROTO_ICMPV6:
5758 			/* icmp only. type always 0 in other cases */
5759 			PF_TEST_ATTRIB(r->type && r->type != icmptype + 1,
5760 				TAILQ_NEXT(r, entries));
5761 			/* icmp only. type always 0 in other cases */
5762 			PF_TEST_ATTRIB(r->code && r->code != icmpcode + 1,
5763 				TAILQ_NEXT(r, entries));
5764 			break;
5765 
5766 		default:
5767 			break;
5768 		}
5769 		PF_TEST_ATTRIB(r->tos && !(r->tos == pd->tos),
5770 			TAILQ_NEXT(r, entries));
5771 		PF_TEST_ATTRIB(r->prio &&
5772 		    !pf_match_ieee8021q_pcp(r->prio, pd->m),
5773 			TAILQ_NEXT(r, entries));
5774 		PF_TEST_ATTRIB(r->prob &&
5775 		    r->prob <= arc4random(),
5776 			TAILQ_NEXT(r, entries));
5777 		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag,
5778 		    pd->pf_mtag ? pd->pf_mtag->tag : 0),
5779 			TAILQ_NEXT(r, entries));
5780 		PF_TEST_ATTRIB(r->rcv_kif && !pf_match_rcvif(pd->m, r),
5781 			TAILQ_NEXT(r, entries));
5782 		PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
5783 		    pd->virtual_proto != PF_VPROTO_FRAGMENT),
5784 			TAILQ_NEXT(r, entries));
5785 		PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY &&
5786 		    (pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match(
5787 		    pf_osfp_fingerprint(pd, th),
5788 		    r->os_fingerprint)),
5789 			TAILQ_NEXT(r, entries));
5790 		/* FALLTHROUGH */
5791 		if (r->tag)
5792 			tag = r->tag;
5793 		if (r->anchor == NULL) {
5794 			if (r->action == PF_MATCH) {
5795 				ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO);
5796 				if (ri == NULL) {
5797 					REASON_SET(&reason, PFRES_MEMORY);
5798 					goto cleanup;
5799 				}
5800 				ri->r = r;
5801 				SLIST_INSERT_HEAD(&match_rules, ri, entry);
5802 				pf_counter_u64_critical_enter();
5803 				pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1);
5804 				pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len);
5805 				pf_counter_u64_critical_exit();
5806 				pf_rule_to_actions(r, &pd->act);
5807 				if (r->naf)
5808 					pd->naf = r->naf;
5809 				if (pd->af != pd->naf) {
5810 					if (pf_get_transaddr_af(r, pd) == -1) {
5811 						REASON_SET(&reason, PFRES_MEMORY);
5812 						goto cleanup;
5813 					}
5814 				}
5815 				if (r->log || pd->act.log & PF_LOG_MATCHES)
5816 					PFLOG_PACKET(r->action, PFRES_MATCH, r,
5817 					    a, ruleset, pd, 1);
5818 			} else {
5819 				match = 1;
5820 				*rm = r;
5821 				*am = a;
5822 				*rsm = ruleset;
5823 				if (pd->act.log & PF_LOG_MATCHES)
5824 					PFLOG_PACKET(r->action, PFRES_MATCH, r,
5825 					    a, ruleset, pd, 1);
5826 			}
5827 			if ((*rm)->quick)
5828 				break;
5829 			r = TAILQ_NEXT(r, entries);
5830 		} else
5831 			pf_step_into_anchor(anchor_stack, &asd,
5832 			    &ruleset, PF_RULESET_FILTER, &r, &a,
5833 			    &match);
5834 nextrule:
5835 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
5836 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
5837 			break;
5838 	}
5839 	r = *rm;
5840 	a = *am;
5841 	ruleset = *rsm;
5842 
5843 	REASON_SET(&reason, PFRES_MATCH);
5844 
5845 	/* apply actions for last matching pass/block rule */
5846 	pf_rule_to_actions(r, &pd->act);
5847 	if (r->naf)
5848 		pd->naf = r->naf;
5849 	if (pd->af != pd->naf) {
5850 		if (pf_get_transaddr_af(r, pd) == -1) {
5851 			REASON_SET(&reason, PFRES_MEMORY);
5852 			goto cleanup;
5853 		}
5854 	}
5855 
5856 	if (r->log || pd->act.log & PF_LOG_MATCHES) {
5857 		if (rewrite)
5858 			m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
5859 		PFLOG_PACKET(r->action, reason, r, a, ruleset, pd, 1);
5860 	}
5861 
5862 	if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
5863 	   (r->action == PF_DROP) &&
5864 	    ((r->rule_flag & PFRULE_RETURNRST) ||
5865 	    (r->rule_flag & PFRULE_RETURNICMP) ||
5866 	    (r->rule_flag & PFRULE_RETURN))) {
5867 		pf_return(r, nr, pd, sk, th, bproto_sum,
5868 		    bip_sum, &reason, r->rtableid);
5869 	}
5870 
5871 	if (r->action == PF_DROP)
5872 		goto cleanup;
5873 
5874 	if (tag > 0 && pf_tag_packet(pd, tag)) {
5875 		REASON_SET(&reason, PFRES_MEMORY);
5876 		goto cleanup;
5877 	}
5878 	if (pd->act.rtableid >= 0)
5879 		M_SETFIB(pd->m, pd->act.rtableid);
5880 
5881 	if (r->rt) {
5882 		struct pf_ksrc_node	*sn = NULL;
5883 		struct pf_srchash	*snh = NULL;
5884 		/*
5885 		 * Set act.rt here instead of in pf_rule_to_actions() because
5886 		 * it is applied only from the last pass rule.
5887 		 */
5888 		pd->act.rt = r->rt;
5889 		/* Don't use REASON_SET, pf_map_addr increases the reason counters */
5890 		reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr,
5891 		    &pd->act.rt_kif, NULL, &sn, &snh, &r->rdr);
5892 		if (reason != 0)
5893 			goto cleanup;
5894 	}
5895 
5896 	if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
5897 	   (!state_icmp && (r->keep_state || nr != NULL ||
5898 	    (pd->flags & PFDESC_TCP_NORM)))) {
5899 		int action;
5900 		bool nat64;
5901 
5902 		action = pf_create_state(r, nr, a, pd, nk, sk,
5903 		    &rewrite, sm, tag, bproto_sum, bip_sum,
5904 		    &match_rules, udp_mapping);
5905 		if (action != PF_PASS) {
5906 			pf_udp_mapping_release(udp_mapping);
5907 			if (action == PF_DROP &&
5908 			    (r->rule_flag & PFRULE_RETURN))
5909 				pf_return(r, nr, pd, sk, th,
5910 				    bproto_sum, bip_sum, &reason,
5911 				    pd->act.rtableid);
5912 			return (action);
5913 		}
5914 
5915 		nat64 = pd->af != pd->naf;
5916 		if (nat64) {
5917 			struct pf_state_key	*_sk;
5918 			int			 ret;
5919 
5920 			if (sk == NULL)
5921 				sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
5922 			if (nk == NULL)
5923 				nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
5924 			if (pd->dir == PF_IN)
5925 				_sk = sk;
5926 			else
5927 				_sk = nk;
5928 
5929 			ret = pf_translate(pd,
5930 			    &_sk->addr[pd->didx],
5931 			    _sk->port[pd->didx],
5932 			    &_sk->addr[pd->sidx],
5933 			    _sk->port[pd->sidx],
5934 			    virtual_type, icmp_dir);
5935 			if (ret < 0)
5936 				goto cleanup;
5937 
5938 			rewrite += ret;
5939 		}
5940 	} else {
5941 		while ((ri = SLIST_FIRST(&match_rules))) {
5942 			SLIST_REMOVE_HEAD(&match_rules, entry);
5943 			free(ri, M_PF_RULE_ITEM);
5944 		}
5945 
5946 		uma_zfree(V_pf_state_key_z, sk);
5947 		uma_zfree(V_pf_state_key_z, nk);
5948 		pf_udp_mapping_release(udp_mapping);
5949 	}
5950 
5951 	/* copy back packet headers if we performed NAT operations */
5952 	if (rewrite)
5953 		m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
5954 
5955 	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
5956 	    pd->dir == PF_OUT &&
5957 	    V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m))
5958 		/*
5959 		 * We want the state created, but we dont
5960 		 * want to send this in case a partner
5961 		 * firewall has to know about it to allow
5962 		 * replies through it.
5963 		 */
5964 		return (PF_DEFER);
5965 
5966 	if (rewrite && sk != NULL && nk != NULL && sk->af != nk->af) {
5967 		return (PF_AFRT);
5968 	} else
5969 		return (PF_PASS);
5970 
5971 cleanup:
5972 	while ((ri = SLIST_FIRST(&match_rules))) {
5973 		SLIST_REMOVE_HEAD(&match_rules, entry);
5974 		free(ri, M_PF_RULE_ITEM);
5975 	}
5976 
5977 	uma_zfree(V_pf_state_key_z, sk);
5978 	uma_zfree(V_pf_state_key_z, nk);
5979 	pf_udp_mapping_release(udp_mapping);
5980 
5981 	return (PF_DROP);
5982 }
5983 
5984 static int
5985 pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
5986     struct pf_pdesc *pd, struct pf_state_key *nk, struct pf_state_key *sk,
5987     int *rewrite, struct pf_kstate **sm, int tag, u_int16_t bproto_sum,
5988     u_int16_t bip_sum, struct pf_krule_slist *match_rules,
5989     struct pf_udp_mapping *udp_mapping)
5990 {
5991 	struct pf_kstate	*s = NULL;
5992 	struct pf_ksrc_node	*sn = NULL;
5993 	struct pf_srchash	*snh = NULL;
5994 	struct pf_ksrc_node	*nsn = NULL;
5995 	struct pf_srchash	*nsnh = NULL;
5996 	struct tcphdr		*th = &pd->hdr.tcp;
5997 	u_int16_t		 mss = V_tcp_mssdflt;
5998 	u_short			 reason, sn_reason;
5999 	struct pf_krule_item	*ri;
6000 
6001 	/* check maximums */
6002 	if (r->max_states &&
6003 	    (counter_u64_fetch(r->states_cur) >= r->max_states)) {
6004 		counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
6005 		REASON_SET(&reason, PFRES_MAXSTATES);
6006 		goto csfailed;
6007 	}
6008 	/* src node for filter rule */
6009 	if ((r->rule_flag & PFRULE_SRCTRACK ||
6010 	    r->rdr.opts & PF_POOL_STICKYADDR) &&
6011 	    (sn_reason = pf_insert_src_node(&sn, &snh, r, pd->src, pd->af,
6012 	    &pd->act.rt_addr, pd->act.rt_kif)) != 0) {
6013 		REASON_SET(&reason, sn_reason);
6014 		goto csfailed;
6015 	}
6016 	/* src node for translation rule */
6017 	if (nr != NULL && (nr->rdr.opts & PF_POOL_STICKYADDR) &&
6018 	    (sn_reason = pf_insert_src_node(&nsn, &nsnh, nr, &sk->addr[pd->sidx],
6019 	    pd->af, &nk->addr[1], NULL)) != 0 ) {
6020 		REASON_SET(&reason, sn_reason);
6021 		goto csfailed;
6022 	}
6023 	s = pf_alloc_state(M_NOWAIT);
6024 	if (s == NULL) {
6025 		REASON_SET(&reason, PFRES_MEMORY);
6026 		goto csfailed;
6027 	}
6028 	s->rule = r;
6029 	s->nat_rule = nr;
6030 	s->anchor = a;
6031 	bcopy(match_rules, &s->match_rules, sizeof(s->match_rules));
6032 	memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions));
6033 
6034 	STATE_INC_COUNTERS(s);
6035 	if (r->allow_opts)
6036 		s->state_flags |= PFSTATE_ALLOWOPTS;
6037 	if (r->rule_flag & PFRULE_STATESLOPPY)
6038 		s->state_flags |= PFSTATE_SLOPPY;
6039 	if (pd->flags & PFDESC_TCP_NORM) /* Set by old-style scrub rules */
6040 		s->state_flags |= PFSTATE_SCRUB_TCP;
6041 	if ((r->rule_flag & PFRULE_PFLOW) ||
6042 	    (nr != NULL && nr->rule_flag & PFRULE_PFLOW))
6043 		s->state_flags |= PFSTATE_PFLOW;
6044 
6045 	s->act.log = pd->act.log & PF_LOG_ALL;
6046 	s->sync_state = PFSYNC_S_NONE;
6047 	s->state_flags |= pd->act.flags; /* Only needed for pfsync and state export */
6048 
6049 	if (nr != NULL)
6050 		s->act.log |= nr->log & PF_LOG_ALL;
6051 	switch (pd->proto) {
6052 	case IPPROTO_TCP:
6053 		s->src.seqlo = ntohl(th->th_seq);
6054 		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
6055 		if ((tcp_get_flags(th) & (TH_SYN|TH_ACK)) == TH_SYN &&
6056 		    r->keep_state == PF_STATE_MODULATE) {
6057 			/* Generate sequence number modulator */
6058 			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
6059 			    0)
6060 				s->src.seqdiff = 1;
6061 			pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum,
6062 			    htonl(s->src.seqlo + s->src.seqdiff), 0);
6063 			*rewrite = 1;
6064 		} else
6065 			s->src.seqdiff = 0;
6066 		if (tcp_get_flags(th) & TH_SYN) {
6067 			s->src.seqhi++;
6068 			s->src.wscale = pf_get_wscale(pd);
6069 		}
6070 		s->src.max_win = MAX(ntohs(th->th_win), 1);
6071 		if (s->src.wscale & PF_WSCALE_MASK) {
6072 			/* Remove scale factor from initial window */
6073 			int win = s->src.max_win;
6074 			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
6075 			s->src.max_win = (win - 1) >>
6076 			    (s->src.wscale & PF_WSCALE_MASK);
6077 		}
6078 		if (tcp_get_flags(th) & TH_FIN)
6079 			s->src.seqhi++;
6080 		s->dst.seqhi = 1;
6081 		s->dst.max_win = 1;
6082 		pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT);
6083 		pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED);
6084 		s->timeout = PFTM_TCP_FIRST_PACKET;
6085 		atomic_add_32(&V_pf_status.states_halfopen, 1);
6086 		break;
6087 	case IPPROTO_UDP:
6088 		pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE);
6089 		pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
6090 		s->timeout = PFTM_UDP_FIRST_PACKET;
6091 		break;
6092 	case IPPROTO_SCTP:
6093 		pf_set_protostate(s, PF_PEER_SRC, SCTP_COOKIE_WAIT);
6094 		pf_set_protostate(s, PF_PEER_DST, SCTP_CLOSED);
6095 		s->timeout = PFTM_SCTP_FIRST_PACKET;
6096 		break;
6097 	case IPPROTO_ICMP:
6098 #ifdef INET6
6099 	case IPPROTO_ICMPV6:
6100 #endif
6101 		s->timeout = PFTM_ICMP_FIRST_PACKET;
6102 		break;
6103 	default:
6104 		pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE);
6105 		pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
6106 		s->timeout = PFTM_OTHER_FIRST_PACKET;
6107 	}
6108 
6109 	s->creation = s->expire = pf_get_uptime();
6110 
6111 	if (pd->proto == IPPROTO_TCP) {
6112 		if (s->state_flags & PFSTATE_SCRUB_TCP &&
6113 		    pf_normalize_tcp_init(pd, th, &s->src, &s->dst)) {
6114 			REASON_SET(&reason, PFRES_MEMORY);
6115 			goto csfailed;
6116 		}
6117 		if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub &&
6118 		    pf_normalize_tcp_stateful(pd, &reason, th, s,
6119 		    &s->src, &s->dst, rewrite)) {
6120 			/* This really shouldn't happen!!! */
6121 			DPFPRINTF(PF_DEBUG_URGENT,
6122 			    ("pf_normalize_tcp_stateful failed on first "
6123 			     "pkt\n"));
6124 			goto csfailed;
6125 		}
6126 	} else if (pd->proto == IPPROTO_SCTP) {
6127 		if (pf_normalize_sctp_init(pd, &s->src, &s->dst))
6128 			goto csfailed;
6129 		if (! (pd->sctp_flags & (PFDESC_SCTP_INIT | PFDESC_SCTP_ADD_IP)))
6130 			goto csfailed;
6131 	}
6132 	s->direction = pd->dir;
6133 
6134 	/*
6135 	 * sk/nk could already been setup by pf_get_translation().
6136 	 */
6137 	if (nr == NULL) {
6138 		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
6139 		    __func__, nr, sk, nk));
6140 		MPASS(pd->sport == NULL || (pd->osport == *pd->sport));
6141 		MPASS(pd->dport == NULL || (pd->odport == *pd->dport));
6142 		if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &sk, &nk)) {
6143 			goto csfailed;
6144 		}
6145 	} else
6146 		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
6147 		    __func__, nr, sk, nk));
6148 
6149 	/* Swap sk/nk for PF_OUT. */
6150 	if (pf_state_insert(BOUND_IFACE(s, pd), pd->kif,
6151 	    (pd->dir == PF_IN) ? sk : nk,
6152 	    (pd->dir == PF_IN) ? nk : sk, s)) {
6153 		REASON_SET(&reason, PFRES_STATEINS);
6154 		goto drop;
6155 	} else
6156 		*sm = s;
6157 
6158 	/*
6159 	 * Lock order is important: first state, then source node.
6160 	 */
6161 	if (pf_src_node_exists(&sn, snh)) {
6162 		s->src_node = sn;
6163 		PF_HASHROW_UNLOCK(snh);
6164 	}
6165 	if (pf_src_node_exists(&nsn, nsnh)) {
6166 		s->nat_src_node = nsn;
6167 		PF_HASHROW_UNLOCK(nsnh);
6168 	}
6169 
6170 	if (tag > 0)
6171 		s->tag = tag;
6172 	if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) ==
6173 	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
6174 		pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
6175 		/* undo NAT changes, if they have taken place */
6176 		if (nr != NULL) {
6177 			struct pf_state_key *skt = s->key[PF_SK_WIRE];
6178 			if (pd->dir == PF_OUT)
6179 				skt = s->key[PF_SK_STACK];
6180 			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
6181 			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
6182 			if (pd->sport)
6183 				*pd->sport = skt->port[pd->sidx];
6184 			if (pd->dport)
6185 				*pd->dport = skt->port[pd->didx];
6186 			if (pd->ip_sum)
6187 				*pd->ip_sum = bip_sum;
6188 			m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
6189 		}
6190 		s->src.seqhi = htonl(arc4random());
6191 		/* Find mss option */
6192 		int rtid = M_GETFIB(pd->m);
6193 		mss = pf_get_mss(pd);
6194 		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
6195 		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
6196 		s->src.mss = mss;
6197 		pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
6198 		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6199 		    TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0,
6200 		    pd->act.rtableid);
6201 		REASON_SET(&reason, PFRES_SYNPROXY);
6202 		return (PF_SYNPROXY_DROP);
6203 	}
6204 
6205 	s->udp_mapping = udp_mapping;
6206 
6207 	return (PF_PASS);
6208 
6209 csfailed:
6210 	while ((ri = SLIST_FIRST(match_rules))) {
6211 		SLIST_REMOVE_HEAD(match_rules, entry);
6212 		free(ri, M_PF_RULE_ITEM);
6213 	}
6214 
6215 	uma_zfree(V_pf_state_key_z, sk);
6216 	uma_zfree(V_pf_state_key_z, nk);
6217 
6218 	if (pf_src_node_exists(&sn, snh)) {
6219 		if (--sn->states == 0 && sn->expire == 0) {
6220 			pf_unlink_src_node(sn);
6221 			pf_free_src_node(sn);
6222 			counter_u64_add(
6223 			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
6224 		}
6225 		PF_HASHROW_UNLOCK(snh);
6226 	}
6227 
6228 	if (sn != nsn && pf_src_node_exists(&nsn, nsnh)) {
6229 		if (--nsn->states == 0 && nsn->expire == 0) {
6230 			pf_unlink_src_node(nsn);
6231 			pf_free_src_node(nsn);
6232 			counter_u64_add(
6233 			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
6234 		}
6235 		PF_HASHROW_UNLOCK(nsnh);
6236 	}
6237 
6238 drop:
6239 	if (s != NULL) {
6240 		pf_src_tree_remove_state(s);
6241 		s->timeout = PFTM_UNLINKED;
6242 		STATE_DEC_COUNTERS(s);
6243 		pf_free_state(s);
6244 	}
6245 
6246 	return (PF_DROP);
6247 }
6248 
6249 int
6250 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
6251     struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
6252     int icmp_dir)
6253 {
6254 	/*
6255 	 * pf_translate() implements OpenBSD's "new" NAT approach.
6256 	 * We don't follow it, because it involves a breaking syntax change
6257 	 * (removing nat/rdr rules, moving it into regular pf rules.)
6258 	 * It also moves NAT processing to be done after normal rules evaluation
6259 	 * whereas in FreeBSD that's done before rules processing.
6260 	 *
6261 	 * We adopt the function only for nat64, and keep other NAT processing
6262 	 * before rules processing.
6263 	 */
6264 	int	rewrite = 0;
6265 	int	afto = pd->af != pd->naf;
6266 
6267 	MPASS(afto);
6268 
6269 	switch (pd->proto) {
6270 	case IPPROTO_TCP:
6271 		if (afto || *pd->sport != sport) {
6272 			pf_change_ap(pd->m, pd->src, pd->sport, pd->ip_sum, &pd->hdr.tcp.th_sum,
6273 			    saddr, sport, 0, pd->af, pd->naf);
6274 			rewrite = 1;
6275 		}
6276 		if (afto || *pd->dport != dport) {
6277 			pf_change_ap(pd->m, pd->dst, pd->dport, pd->ip_sum, &pd->hdr.tcp.th_sum,
6278 			    daddr, dport, 0, pd->af, pd->naf);
6279 			rewrite = 1;
6280 		}
6281 		break;
6282 
6283 	case IPPROTO_UDP:
6284 		if (afto || *pd->sport != sport) {
6285 			pf_change_ap(pd->m, pd->src, pd->sport, pd->ip_sum, &pd->hdr.udp.uh_sum,
6286 			    saddr, sport, 1, pd->af, pd->naf);
6287 			rewrite = 1;
6288 		}
6289 		if (afto || *pd->dport != dport) {
6290 			pf_change_ap(pd->m, pd->dst, pd->dport, pd->ip_sum, &pd->hdr.udp.uh_sum,
6291 			    daddr, dport, 1, pd->af, pd->naf);
6292 			rewrite = 1;
6293 		}
6294 		break;
6295 
6296 	case IPPROTO_SCTP: {
6297 		uint16_t checksum = 0;
6298 		if (afto || *pd->sport != sport) {
6299 			pf_change_ap(pd->m, pd->src, pd->sport, pd->ip_sum, &checksum,
6300 			    saddr, sport, 1, pd->af, pd->naf);
6301 			rewrite = 1;
6302 		}
6303 		if (afto || *pd->dport != dport) {
6304 			pf_change_ap(pd->m, pd->dst, pd->dport, pd->ip_sum, &checksum,
6305 			    daddr, dport, 1, pd->af, pd->naf);
6306 			rewrite = 1;
6307 		}
6308 		break;
6309 	}
6310 
6311 #ifdef INET
6312 	case IPPROTO_ICMP:
6313 		/* pf_translate() is also used when logging invalid packets */
6314 		if (pd->af != AF_INET)
6315 			return (0);
6316 
6317 		if (afto) {
6318 			if (pf_translate_icmp_af(AF_INET6, &pd->hdr.icmp))
6319 				return (-1);
6320 			pd->proto = IPPROTO_ICMPV6;
6321 			rewrite = 1;
6322 		}
6323 		if (virtual_type == htons(ICMP_ECHO)) {
6324 			u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
6325 
6326 			if (icmpid != pd->hdr.icmp.icmp_id) {
6327 				pd->hdr.icmp.icmp_cksum = pf_cksum_fixup(
6328 				    pd->hdr.icmp.icmp_cksum,
6329 				    pd->hdr.icmp.icmp_id, icmpid, 0);
6330 				pd->hdr.icmp.icmp_id = icmpid;
6331 				/* XXX TODO copyback. */
6332 				rewrite = 1;
6333 			}
6334 		}
6335 		break;
6336 #endif /* INET */
6337 
6338 #ifdef INET6
6339 	case IPPROTO_ICMPV6:
6340 		/* pf_translate() is also used when logging invalid packets */
6341 		if (pd->af != AF_INET6)
6342 			return (0);
6343 
6344 		if (afto) {
6345 			/* ip_sum will be recalculated in pf_translate_af */
6346 			if (pf_translate_icmp_af(AF_INET, &pd->hdr.icmp6))
6347 				return (0);
6348 			pd->proto = IPPROTO_ICMP;
6349 			rewrite = 1;
6350 		}
6351 		break;
6352 #endif /* INET6 */
6353 
6354 	default:
6355 		break;
6356 	}
6357 
6358 	return (rewrite);
6359 }
6360 
6361 static int
6362 pf_tcp_track_full(struct pf_kstate **state, struct pf_pdesc *pd,
6363     u_short *reason, int *copyback)
6364 {
6365 	struct tcphdr		*th = &pd->hdr.tcp;
6366 	struct pf_state_peer	*src, *dst;
6367 	u_int16_t		 win = ntohs(th->th_win);
6368 	u_int32_t		 ack, end, data_end, seq, orig_seq;
6369 	u_int8_t		 sws, dws, psrc, pdst;
6370 	int			 ackskew;
6371 
6372 	if (pd->dir == (*state)->direction) {
6373 		if (PF_REVERSED_KEY((*state)->key, pd->af)) {
6374 			src = &(*state)->dst;
6375 			dst = &(*state)->src;
6376 		} else {
6377 			src = &(*state)->src;
6378 			dst = &(*state)->dst;
6379 		}
6380 		psrc = PF_PEER_SRC;
6381 		pdst = PF_PEER_DST;
6382 	} else {
6383 		if (PF_REVERSED_KEY((*state)->key, pd->af)) {
6384 			src = &(*state)->src;
6385 			dst = &(*state)->dst;
6386 		} else {
6387 			src = &(*state)->dst;
6388 			dst = &(*state)->src;
6389 		}
6390 		psrc = PF_PEER_DST;
6391 		pdst = PF_PEER_SRC;
6392 	}
6393 
6394 	if (src->wscale && dst->wscale && !(tcp_get_flags(th) & TH_SYN)) {
6395 		sws = src->wscale & PF_WSCALE_MASK;
6396 		dws = dst->wscale & PF_WSCALE_MASK;
6397 	} else
6398 		sws = dws = 0;
6399 
6400 	/*
6401 	 * Sequence tracking algorithm from Guido van Rooij's paper:
6402 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
6403 	 *	tcp_filtering.ps
6404 	 */
6405 
6406 	orig_seq = seq = ntohl(th->th_seq);
6407 	if (src->seqlo == 0) {
6408 		/* First packet from this end. Set its state */
6409 
6410 		if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
6411 		    src->scrub == NULL) {
6412 			if (pf_normalize_tcp_init(pd, th, src, dst)) {
6413 				REASON_SET(reason, PFRES_MEMORY);
6414 				return (PF_DROP);
6415 			}
6416 		}
6417 
6418 		/* Deferred generation of sequence number modulator */
6419 		if (dst->seqdiff && !src->seqdiff) {
6420 			/* use random iss for the TCP server */
6421 			while ((src->seqdiff = arc4random() - seq) == 0)
6422 				;
6423 			ack = ntohl(th->th_ack) - dst->seqdiff;
6424 			pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
6425 			    src->seqdiff), 0);
6426 			pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
6427 			*copyback = 1;
6428 		} else {
6429 			ack = ntohl(th->th_ack);
6430 		}
6431 
6432 		end = seq + pd->p_len;
6433 		if (tcp_get_flags(th) & TH_SYN) {
6434 			end++;
6435 			if (dst->wscale & PF_WSCALE_FLAG) {
6436 				src->wscale = pf_get_wscale(pd);
6437 				if (src->wscale & PF_WSCALE_FLAG) {
6438 					/* Remove scale factor from initial
6439 					 * window */
6440 					sws = src->wscale & PF_WSCALE_MASK;
6441 					win = ((u_int32_t)win + (1 << sws) - 1)
6442 					    >> sws;
6443 					dws = dst->wscale & PF_WSCALE_MASK;
6444 				} else {
6445 					/* fixup other window */
6446 					dst->max_win = MIN(TCP_MAXWIN,
6447 					    (u_int32_t)dst->max_win <<
6448 					    (dst->wscale & PF_WSCALE_MASK));
6449 					/* in case of a retrans SYN|ACK */
6450 					dst->wscale = 0;
6451 				}
6452 			}
6453 		}
6454 		data_end = end;
6455 		if (tcp_get_flags(th) & TH_FIN)
6456 			end++;
6457 
6458 		src->seqlo = seq;
6459 		if (src->state < TCPS_SYN_SENT)
6460 			pf_set_protostate(*state, psrc, TCPS_SYN_SENT);
6461 
6462 		/*
6463 		 * May need to slide the window (seqhi may have been set by
6464 		 * the crappy stack check or if we picked up the connection
6465 		 * after establishment)
6466 		 */
6467 		if (src->seqhi == 1 ||
6468 		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
6469 			src->seqhi = end + MAX(1, dst->max_win << dws);
6470 		if (win > src->max_win)
6471 			src->max_win = win;
6472 
6473 	} else {
6474 		ack = ntohl(th->th_ack) - dst->seqdiff;
6475 		if (src->seqdiff) {
6476 			/* Modulate sequence numbers */
6477 			pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq +
6478 			    src->seqdiff), 0);
6479 			pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0);
6480 			*copyback = 1;
6481 		}
6482 		end = seq + pd->p_len;
6483 		if (tcp_get_flags(th) & TH_SYN)
6484 			end++;
6485 		data_end = end;
6486 		if (tcp_get_flags(th) & TH_FIN)
6487 			end++;
6488 	}
6489 
6490 	if ((tcp_get_flags(th) & TH_ACK) == 0) {
6491 		/* Let it pass through the ack skew check */
6492 		ack = dst->seqlo;
6493 	} else if ((ack == 0 &&
6494 	    (tcp_get_flags(th) & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
6495 	    /* broken tcp stacks do not set ack */
6496 	    (dst->state < TCPS_SYN_SENT)) {
6497 		/*
6498 		 * Many stacks (ours included) will set the ACK number in an
6499 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
6500 		 */
6501 		ack = dst->seqlo;
6502 	}
6503 
6504 	if (seq == end) {
6505 		/* Ease sequencing restrictions on no data packets */
6506 		seq = src->seqlo;
6507 		data_end = end = seq;
6508 	}
6509 
6510 	ackskew = dst->seqlo - ack;
6511 
6512 	/*
6513 	 * Need to demodulate the sequence numbers in any TCP SACK options
6514 	 * (Selective ACK). We could optionally validate the SACK values
6515 	 * against the current ACK window, either forwards or backwards, but
6516 	 * I'm not confident that SACK has been implemented properly
6517 	 * everywhere. It wouldn't surprise me if several stacks accidentally
6518 	 * SACK too far backwards of previously ACKed data. There really aren't
6519 	 * any security implications of bad SACKing unless the target stack
6520 	 * doesn't validate the option length correctly. Someone trying to
6521 	 * spoof into a TCP connection won't bother blindly sending SACK
6522 	 * options anyway.
6523 	 */
6524 	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
6525 		if (pf_modulate_sack(pd, th, dst))
6526 			*copyback = 1;
6527 	}
6528 
6529 #define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
6530 	if (SEQ_GEQ(src->seqhi, data_end) &&
6531 	    /* Last octet inside other's window space */
6532 	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
6533 	    /* Retrans: not more than one window back */
6534 	    (ackskew >= -MAXACKWINDOW) &&
6535 	    /* Acking not more than one reassembled fragment backwards */
6536 	    (ackskew <= (MAXACKWINDOW << sws)) &&
6537 	    /* Acking not more than one window forward */
6538 	    ((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo ||
6539 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) {
6540 	    /* Require an exact/+1 sequence match on resets when possible */
6541 
6542 		if (dst->scrub || src->scrub) {
6543 			if (pf_normalize_tcp_stateful(pd, reason, th,
6544 			    *state, src, dst, copyback))
6545 				return (PF_DROP);
6546 		}
6547 
6548 		/* update max window */
6549 		if (src->max_win < win)
6550 			src->max_win = win;
6551 		/* synchronize sequencing */
6552 		if (SEQ_GT(end, src->seqlo))
6553 			src->seqlo = end;
6554 		/* slide the window of what the other end can send */
6555 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
6556 			dst->seqhi = ack + MAX((win << sws), 1);
6557 
6558 		/* update states */
6559 		if (tcp_get_flags(th) & TH_SYN)
6560 			if (src->state < TCPS_SYN_SENT)
6561 				pf_set_protostate(*state, psrc, TCPS_SYN_SENT);
6562 		if (tcp_get_flags(th) & TH_FIN)
6563 			if (src->state < TCPS_CLOSING)
6564 				pf_set_protostate(*state, psrc, TCPS_CLOSING);
6565 		if (tcp_get_flags(th) & TH_ACK) {
6566 			if (dst->state == TCPS_SYN_SENT) {
6567 				pf_set_protostate(*state, pdst,
6568 				    TCPS_ESTABLISHED);
6569 				if (src->state == TCPS_ESTABLISHED &&
6570 				    (*state)->src_node != NULL &&
6571 				    pf_src_connlimit(*state)) {
6572 					REASON_SET(reason, PFRES_SRCLIMIT);
6573 					return (PF_DROP);
6574 				}
6575 			} else if (dst->state == TCPS_CLOSING)
6576 				pf_set_protostate(*state, pdst,
6577 				    TCPS_FIN_WAIT_2);
6578 		}
6579 		if (tcp_get_flags(th) & TH_RST)
6580 			pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT);
6581 
6582 		/* update expire time */
6583 		(*state)->expire = pf_get_uptime();
6584 		if (src->state >= TCPS_FIN_WAIT_2 &&
6585 		    dst->state >= TCPS_FIN_WAIT_2)
6586 			(*state)->timeout = PFTM_TCP_CLOSED;
6587 		else if (src->state >= TCPS_CLOSING &&
6588 		    dst->state >= TCPS_CLOSING)
6589 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
6590 		else if (src->state < TCPS_ESTABLISHED ||
6591 		    dst->state < TCPS_ESTABLISHED)
6592 			(*state)->timeout = PFTM_TCP_OPENING;
6593 		else if (src->state >= TCPS_CLOSING ||
6594 		    dst->state >= TCPS_CLOSING)
6595 			(*state)->timeout = PFTM_TCP_CLOSING;
6596 		else
6597 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
6598 
6599 		/* Fall through to PASS packet */
6600 
6601 	} else if ((dst->state < TCPS_SYN_SENT ||
6602 		dst->state >= TCPS_FIN_WAIT_2 ||
6603 		src->state >= TCPS_FIN_WAIT_2) &&
6604 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
6605 	    /* Within a window forward of the originating packet */
6606 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
6607 	    /* Within a window backward of the originating packet */
6608 
6609 		/*
6610 		 * This currently handles three situations:
6611 		 *  1) Stupid stacks will shotgun SYNs before their peer
6612 		 *     replies.
6613 		 *  2) When PF catches an already established stream (the
6614 		 *     firewall rebooted, the state table was flushed, routes
6615 		 *     changed...)
6616 		 *  3) Packets get funky immediately after the connection
6617 		 *     closes (this should catch Solaris spurious ACK|FINs
6618 		 *     that web servers like to spew after a close)
6619 		 *
6620 		 * This must be a little more careful than the above code
6621 		 * since packet floods will also be caught here. We don't
6622 		 * update the TTL here to mitigate the damage of a packet
6623 		 * flood and so the same code can handle awkward establishment
6624 		 * and a loosened connection close.
6625 		 * In the establishment case, a correct peer response will
6626 		 * validate the connection, go through the normal state code
6627 		 * and keep updating the state TTL.
6628 		 */
6629 
6630 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
6631 			printf("pf: loose state match: ");
6632 			pf_print_state(*state);
6633 			pf_print_flags(tcp_get_flags(th));
6634 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
6635 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
6636 			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
6637 			    (unsigned long long)(*state)->packets[1],
6638 			    pd->dir == PF_IN ? "in" : "out",
6639 			    pd->dir == (*state)->direction ? "fwd" : "rev");
6640 		}
6641 
6642 		if (dst->scrub || src->scrub) {
6643 			if (pf_normalize_tcp_stateful(pd, reason, th,
6644 			    *state, src, dst, copyback))
6645 				return (PF_DROP);
6646 		}
6647 
6648 		/* update max window */
6649 		if (src->max_win < win)
6650 			src->max_win = win;
6651 		/* synchronize sequencing */
6652 		if (SEQ_GT(end, src->seqlo))
6653 			src->seqlo = end;
6654 		/* slide the window of what the other end can send */
6655 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
6656 			dst->seqhi = ack + MAX((win << sws), 1);
6657 
6658 		/*
6659 		 * Cannot set dst->seqhi here since this could be a shotgunned
6660 		 * SYN and not an already established connection.
6661 		 */
6662 
6663 		if (tcp_get_flags(th) & TH_FIN)
6664 			if (src->state < TCPS_CLOSING)
6665 				pf_set_protostate(*state, psrc, TCPS_CLOSING);
6666 		if (tcp_get_flags(th) & TH_RST)
6667 			pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT);
6668 
6669 		/* Fall through to PASS packet */
6670 
6671 	} else {
6672 		if ((*state)->dst.state == TCPS_SYN_SENT &&
6673 		    (*state)->src.state == TCPS_SYN_SENT) {
6674 			/* Send RST for state mismatches during handshake */
6675 			if (!(tcp_get_flags(th) & TH_RST))
6676 				pf_send_tcp((*state)->rule, pd->af,
6677 				    pd->dst, pd->src, th->th_dport,
6678 				    th->th_sport, ntohl(th->th_ack), 0,
6679 				    TH_RST, 0, 0,
6680 				    (*state)->rule->return_ttl, M_SKIP_FIREWALL,
6681 				    0, 0, (*state)->act.rtableid);
6682 			src->seqlo = 0;
6683 			src->seqhi = 1;
6684 			src->max_win = 1;
6685 		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
6686 			printf("pf: BAD state: ");
6687 			pf_print_state(*state);
6688 			pf_print_flags(tcp_get_flags(th));
6689 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
6690 			    "pkts=%llu:%llu dir=%s,%s\n",
6691 			    seq, orig_seq, ack, pd->p_len, ackskew,
6692 			    (unsigned long long)(*state)->packets[0],
6693 			    (unsigned long long)(*state)->packets[1],
6694 			    pd->dir == PF_IN ? "in" : "out",
6695 			    pd->dir == (*state)->direction ? "fwd" : "rev");
6696 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
6697 			    SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
6698 			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
6699 			    ' ': '2',
6700 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
6701 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
6702 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?' ' :'5',
6703 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
6704 		}
6705 		REASON_SET(reason, PFRES_BADSTATE);
6706 		return (PF_DROP);
6707 	}
6708 
6709 	return (PF_PASS);
6710 }
6711 
6712 static int
6713 pf_tcp_track_sloppy(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
6714 {
6715 	struct tcphdr		*th = &pd->hdr.tcp;
6716 	struct pf_state_peer	*src, *dst;
6717 	u_int8_t		 psrc, pdst;
6718 
6719 	if (pd->dir == (*state)->direction) {
6720 		src = &(*state)->src;
6721 		dst = &(*state)->dst;
6722 		psrc = PF_PEER_SRC;
6723 		pdst = PF_PEER_DST;
6724 	} else {
6725 		src = &(*state)->dst;
6726 		dst = &(*state)->src;
6727 		psrc = PF_PEER_DST;
6728 		pdst = PF_PEER_SRC;
6729 	}
6730 
6731 	if (tcp_get_flags(th) & TH_SYN)
6732 		if (src->state < TCPS_SYN_SENT)
6733 			pf_set_protostate(*state, psrc, TCPS_SYN_SENT);
6734 	if (tcp_get_flags(th) & TH_FIN)
6735 		if (src->state < TCPS_CLOSING)
6736 			pf_set_protostate(*state, psrc, TCPS_CLOSING);
6737 	if (tcp_get_flags(th) & TH_ACK) {
6738 		if (dst->state == TCPS_SYN_SENT) {
6739 			pf_set_protostate(*state, pdst, TCPS_ESTABLISHED);
6740 			if (src->state == TCPS_ESTABLISHED &&
6741 			    (*state)->src_node != NULL &&
6742 			    pf_src_connlimit(*state)) {
6743 				REASON_SET(reason, PFRES_SRCLIMIT);
6744 				return (PF_DROP);
6745 			}
6746 		} else if (dst->state == TCPS_CLOSING) {
6747 			pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2);
6748 		} else if (src->state == TCPS_SYN_SENT &&
6749 		    dst->state < TCPS_SYN_SENT) {
6750 			/*
6751 			 * Handle a special sloppy case where we only see one
6752 			 * half of the connection. If there is a ACK after
6753 			 * the initial SYN without ever seeing a packet from
6754 			 * the destination, set the connection to established.
6755 			 */
6756 			pf_set_protostate(*state, PF_PEER_BOTH,
6757 			    TCPS_ESTABLISHED);
6758 			dst->state = src->state = TCPS_ESTABLISHED;
6759 			if ((*state)->src_node != NULL &&
6760 			    pf_src_connlimit(*state)) {
6761 				REASON_SET(reason, PFRES_SRCLIMIT);
6762 				return (PF_DROP);
6763 			}
6764 		} else if (src->state == TCPS_CLOSING &&
6765 		    dst->state == TCPS_ESTABLISHED &&
6766 		    dst->seqlo == 0) {
6767 			/*
6768 			 * Handle the closing of half connections where we
6769 			 * don't see the full bidirectional FIN/ACK+ACK
6770 			 * handshake.
6771 			 */
6772 			pf_set_protostate(*state, pdst, TCPS_CLOSING);
6773 		}
6774 	}
6775 	if (tcp_get_flags(th) & TH_RST)
6776 		pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT);
6777 
6778 	/* update expire time */
6779 	(*state)->expire = pf_get_uptime();
6780 	if (src->state >= TCPS_FIN_WAIT_2 &&
6781 	    dst->state >= TCPS_FIN_WAIT_2)
6782 		(*state)->timeout = PFTM_TCP_CLOSED;
6783 	else if (src->state >= TCPS_CLOSING &&
6784 	    dst->state >= TCPS_CLOSING)
6785 		(*state)->timeout = PFTM_TCP_FIN_WAIT;
6786 	else if (src->state < TCPS_ESTABLISHED ||
6787 	    dst->state < TCPS_ESTABLISHED)
6788 		(*state)->timeout = PFTM_TCP_OPENING;
6789 	else if (src->state >= TCPS_CLOSING ||
6790 	    dst->state >= TCPS_CLOSING)
6791 		(*state)->timeout = PFTM_TCP_CLOSING;
6792 	else
6793 		(*state)->timeout = PFTM_TCP_ESTABLISHED;
6794 
6795 	return (PF_PASS);
6796 }
6797 
6798 static int
6799 pf_synproxy(struct pf_pdesc *pd, struct pf_kstate **state, u_short *reason)
6800 {
6801 	struct pf_state_key	*sk = (*state)->key[pd->didx];
6802 	struct tcphdr		*th = &pd->hdr.tcp;
6803 
6804 	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
6805 		if (pd->dir != (*state)->direction) {
6806 			REASON_SET(reason, PFRES_SYNPROXY);
6807 			return (PF_SYNPROXY_DROP);
6808 		}
6809 		if (tcp_get_flags(th) & TH_SYN) {
6810 			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
6811 				REASON_SET(reason, PFRES_SYNPROXY);
6812 				return (PF_DROP);
6813 			}
6814 			pf_send_tcp((*state)->rule, pd->af, pd->dst,
6815 			    pd->src, th->th_dport, th->th_sport,
6816 			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
6817 			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0,
6818 			    M_SKIP_FIREWALL, 0, 0, (*state)->act.rtableid);
6819 			REASON_SET(reason, PFRES_SYNPROXY);
6820 			return (PF_SYNPROXY_DROP);
6821 		} else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
6822 		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
6823 		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
6824 			REASON_SET(reason, PFRES_SYNPROXY);
6825 			return (PF_DROP);
6826 		} else if ((*state)->src_node != NULL &&
6827 		    pf_src_connlimit(*state)) {
6828 			REASON_SET(reason, PFRES_SRCLIMIT);
6829 			return (PF_DROP);
6830 		} else
6831 			pf_set_protostate(*state, PF_PEER_SRC,
6832 			    PF_TCPS_PROXY_DST);
6833 	}
6834 	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
6835 		if (pd->dir == (*state)->direction) {
6836 			if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != TH_ACK) ||
6837 			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
6838 			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
6839 				REASON_SET(reason, PFRES_SYNPROXY);
6840 				return (PF_DROP);
6841 			}
6842 			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
6843 			if ((*state)->dst.seqhi == 1)
6844 				(*state)->dst.seqhi = htonl(arc4random());
6845 			pf_send_tcp((*state)->rule, pd->af,
6846 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
6847 			    sk->port[pd->sidx], sk->port[pd->didx],
6848 			    (*state)->dst.seqhi, 0, TH_SYN, 0,
6849 			    (*state)->src.mss, 0,
6850 			    (*state)->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0,
6851 			    (*state)->tag, 0, (*state)->act.rtableid);
6852 			REASON_SET(reason, PFRES_SYNPROXY);
6853 			return (PF_SYNPROXY_DROP);
6854 		} else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) !=
6855 		    (TH_SYN|TH_ACK)) ||
6856 		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
6857 			REASON_SET(reason, PFRES_SYNPROXY);
6858 			return (PF_DROP);
6859 		} else {
6860 			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
6861 			(*state)->dst.seqlo = ntohl(th->th_seq);
6862 			pf_send_tcp((*state)->rule, pd->af, pd->dst,
6863 			    pd->src, th->th_dport, th->th_sport,
6864 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
6865 			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
6866 			    (*state)->tag, 0, (*state)->act.rtableid);
6867 			pf_send_tcp((*state)->rule, pd->af,
6868 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
6869 			    sk->port[pd->sidx], sk->port[pd->didx],
6870 			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
6871 			    TH_ACK, (*state)->dst.max_win, 0, 0,
6872 			    M_SKIP_FIREWALL, 0, 0, (*state)->act.rtableid);
6873 			(*state)->src.seqdiff = (*state)->dst.seqhi -
6874 			    (*state)->src.seqlo;
6875 			(*state)->dst.seqdiff = (*state)->src.seqhi -
6876 			    (*state)->dst.seqlo;
6877 			(*state)->src.seqhi = (*state)->src.seqlo +
6878 			    (*state)->dst.max_win;
6879 			(*state)->dst.seqhi = (*state)->dst.seqlo +
6880 			    (*state)->src.max_win;
6881 			(*state)->src.wscale = (*state)->dst.wscale = 0;
6882 			pf_set_protostate(*state, PF_PEER_BOTH,
6883 			    TCPS_ESTABLISHED);
6884 			REASON_SET(reason, PFRES_SYNPROXY);
6885 			return (PF_SYNPROXY_DROP);
6886 		}
6887 	}
6888 
6889 	return (PF_PASS);
6890 }
6891 
6892 static int
6893 pf_test_state_tcp(struct pf_kstate **state, struct pf_pdesc *pd,
6894     u_short *reason)
6895 {
6896 	struct pf_state_key_cmp	 key;
6897 	struct tcphdr		*th = &pd->hdr.tcp;
6898 	int			 copyback = 0;
6899 	int			 action = PF_PASS;
6900 	struct pf_state_peer	*src, *dst;
6901 
6902 	bzero(&key, sizeof(key));
6903 	key.af = pd->af;
6904 	key.proto = IPPROTO_TCP;
6905 	if (pd->dir == PF_IN)	{	/* wire side, straight */
6906 		PF_ACPY(&key.addr[0], pd->src, key.af);
6907 		PF_ACPY(&key.addr[1], pd->dst, key.af);
6908 		key.port[0] = th->th_sport;
6909 		key.port[1] = th->th_dport;
6910 	} else {			/* stack side, reverse */
6911 		PF_ACPY(&key.addr[1], pd->src, key.af);
6912 		PF_ACPY(&key.addr[0], pd->dst, key.af);
6913 		key.port[1] = th->th_sport;
6914 		key.port[0] = th->th_dport;
6915 	}
6916 
6917 	STATE_LOOKUP(&key, *state, pd);
6918 
6919 	if (pd->dir == (*state)->direction) {
6920 		src = &(*state)->src;
6921 		dst = &(*state)->dst;
6922 	} else {
6923 		src = &(*state)->dst;
6924 		dst = &(*state)->src;
6925 	}
6926 
6927 	if ((action = pf_synproxy(pd, state, reason)) != PF_PASS)
6928 		return (action);
6929 
6930 	if (dst->state >= TCPS_FIN_WAIT_2 &&
6931 	    src->state >= TCPS_FIN_WAIT_2 &&
6932 	    (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) == TH_SYN) ||
6933 	    ((tcp_get_flags(th) & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK &&
6934 	    pf_syncookie_check(pd) && pd->dir == PF_IN))) {
6935 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
6936 			printf("pf: state reuse ");
6937 			pf_print_state(*state);
6938 			pf_print_flags(tcp_get_flags(th));
6939 			printf("\n");
6940 		}
6941 		/* XXX make sure it's the same direction ?? */
6942 		pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED);
6943 		pf_unlink_state(*state);
6944 		*state = NULL;
6945 		return (PF_DROP);
6946 	}
6947 
6948 	if ((*state)->state_flags & PFSTATE_SLOPPY) {
6949 		if (pf_tcp_track_sloppy(state, pd, reason) == PF_DROP)
6950 			return (PF_DROP);
6951 	} else {
6952 		int	 ret;
6953 
6954 		ret = pf_tcp_track_full(state, pd, reason,
6955 		    &copyback);
6956 		if (ret == PF_DROP)
6957 			return (PF_DROP);
6958 	}
6959 
6960 	/* translate source/destination address, if necessary */
6961 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
6962 		struct pf_state_key	*nk;
6963 		int			 afto, sidx, didx;
6964 
6965 		if (PF_REVERSED_KEY((*state)->key, pd->af))
6966 			nk = (*state)->key[pd->sidx];
6967 		else
6968 			nk = (*state)->key[pd->didx];
6969 
6970 		afto = pd->af != nk->af;
6971 		sidx = afto ? pd->didx : pd->sidx;
6972 		didx = afto ? pd->sidx : pd->didx;
6973 
6974 		if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) ||
6975 		    nk->port[sidx] != th->th_sport)
6976 			pf_change_ap(pd->m, pd->src, &th->th_sport,
6977 			    pd->ip_sum, &th->th_sum, &nk->addr[sidx],
6978 			    nk->port[sidx], 0, pd->af, nk->af);
6979 
6980 		if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
6981 		    nk->port[didx] != th->th_dport)
6982 			pf_change_ap(pd->m, pd->dst, &th->th_dport,
6983 			    pd->ip_sum, &th->th_sum, &nk->addr[didx],
6984 			    nk->port[didx], 0, pd->af, nk->af);
6985 
6986 		if (afto) {
6987 			PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af);
6988 			PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af);
6989 			pd->naf = nk->af;
6990 			action = PF_AFRT;
6991 		}
6992 
6993 		copyback = 1;
6994 	}
6995 
6996 	/* Copyback sequence modulation or stateful scrub changes if needed */
6997 	if (copyback)
6998 		m_copyback(pd->m, pd->off, sizeof(*th), (caddr_t)th);
6999 
7000 	return (action);
7001 }
7002 
7003 static int
7004 pf_test_state_udp(struct pf_kstate **state, struct pf_pdesc *pd)
7005 {
7006 	struct pf_state_peer	*src, *dst;
7007 	struct pf_state_key_cmp	 key;
7008 	struct udphdr		*uh = &pd->hdr.udp;
7009 	uint8_t			 psrc, pdst;
7010 	int			 action = PF_PASS;
7011 
7012 	bzero(&key, sizeof(key));
7013 	key.af = pd->af;
7014 	key.proto = IPPROTO_UDP;
7015 	if (pd->dir == PF_IN)	{	/* wire side, straight */
7016 		PF_ACPY(&key.addr[0], pd->src, key.af);
7017 		PF_ACPY(&key.addr[1], pd->dst, key.af);
7018 		key.port[0] = uh->uh_sport;
7019 		key.port[1] = uh->uh_dport;
7020 	} else {			/* stack side, reverse */
7021 		PF_ACPY(&key.addr[1], pd->src, key.af);
7022 		PF_ACPY(&key.addr[0], pd->dst, key.af);
7023 		key.port[1] = uh->uh_sport;
7024 		key.port[0] = uh->uh_dport;
7025 	}
7026 
7027 	STATE_LOOKUP(&key, *state, pd);
7028 
7029 	if (pd->dir == (*state)->direction) {
7030 		src = &(*state)->src;
7031 		dst = &(*state)->dst;
7032 		psrc = PF_PEER_SRC;
7033 		pdst = PF_PEER_DST;
7034 	} else {
7035 		src = &(*state)->dst;
7036 		dst = &(*state)->src;
7037 		psrc = PF_PEER_DST;
7038 		pdst = PF_PEER_SRC;
7039 	}
7040 
7041 	/* update states */
7042 	if (src->state < PFUDPS_SINGLE)
7043 		pf_set_protostate(*state, psrc, PFUDPS_SINGLE);
7044 	if (dst->state == PFUDPS_SINGLE)
7045 		pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE);
7046 
7047 	/* update expire time */
7048 	(*state)->expire = pf_get_uptime();
7049 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
7050 		(*state)->timeout = PFTM_UDP_MULTIPLE;
7051 	else
7052 		(*state)->timeout = PFTM_UDP_SINGLE;
7053 
7054 	/* translate source/destination address, if necessary */
7055 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
7056 		struct pf_state_key	*nk;
7057 		int			 afto, sidx, didx;
7058 
7059 		if (PF_REVERSED_KEY((*state)->key, pd->af))
7060 			nk = (*state)->key[pd->sidx];
7061 		else
7062 			nk = (*state)->key[pd->didx];
7063 
7064 		afto = pd->af != nk->af;
7065 		sidx = afto ? pd->didx : pd->sidx;
7066 		didx = afto ? pd->sidx : pd->didx;
7067 
7068 		if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) ||
7069 		    nk->port[sidx] != uh->uh_sport)
7070 			pf_change_ap(pd->m, pd->src, &uh->uh_sport, pd->ip_sum,
7071 			    &uh->uh_sum, &nk->addr[pd->sidx],
7072 			    nk->port[sidx], 1, pd->af, nk->af);
7073 
7074 		if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
7075 		    nk->port[didx] != uh->uh_dport)
7076 			pf_change_ap(pd->m, pd->dst, &uh->uh_dport, pd->ip_sum,
7077 			    &uh->uh_sum, &nk->addr[pd->didx],
7078 			    nk->port[didx], 1, pd->af, nk->af);
7079 
7080 		if (afto) {
7081 			PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af);
7082 			PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af);
7083 			pd->naf = nk->af;
7084 			action = PF_AFRT;
7085 		}
7086 
7087 		m_copyback(pd->m, pd->off, sizeof(*uh), (caddr_t)uh);
7088 	}
7089 
7090 	return (action);
7091 }
7092 
7093 static int
7094 pf_sctp_track(struct pf_kstate *state, struct pf_pdesc *pd,
7095     u_short *reason)
7096 {
7097 	struct pf_state_peer	*src;
7098 	if (pd->dir == state->direction) {
7099 		if (PF_REVERSED_KEY(state->key, pd->af))
7100 			src = &state->dst;
7101 		else
7102 			src = &state->src;
7103 	} else {
7104 		if (PF_REVERSED_KEY(state->key, pd->af))
7105 			src = &state->src;
7106 		else
7107 			src = &state->dst;
7108 	}
7109 
7110 	if (src->scrub != NULL) {
7111 		if (src->scrub->pfss_v_tag == 0)
7112 			src->scrub->pfss_v_tag = pd->hdr.sctp.v_tag;
7113 		else  if (src->scrub->pfss_v_tag != pd->hdr.sctp.v_tag)
7114 			return (PF_DROP);
7115 	}
7116 
7117 	return (PF_PASS);
7118 }
7119 
7120 static int
7121 pf_test_state_sctp(struct pf_kstate **state, struct pf_pdesc *pd,
7122     u_short *reason)
7123 {
7124 	struct pf_state_key_cmp	 key;
7125 	struct pf_state_peer	*src, *dst;
7126 	struct sctphdr		*sh = &pd->hdr.sctp;
7127 	u_int8_t		 psrc; //, pdst;
7128 
7129 	bzero(&key, sizeof(key));
7130 	key.af = pd->af;
7131 	key.proto = IPPROTO_SCTP;
7132 	if (pd->dir == PF_IN)	{	/* wire side, straight */
7133 		PF_ACPY(&key.addr[0], pd->src, key.af);
7134 		PF_ACPY(&key.addr[1], pd->dst, key.af);
7135 		key.port[0] = sh->src_port;
7136 		key.port[1] = sh->dest_port;
7137 	} else {			/* stack side, reverse */
7138 		PF_ACPY(&key.addr[1], pd->src, key.af);
7139 		PF_ACPY(&key.addr[0], pd->dst, key.af);
7140 		key.port[1] = sh->src_port;
7141 		key.port[0] = sh->dest_port;
7142 	}
7143 
7144 	STATE_LOOKUP(&key, *state, pd);
7145 
7146 	if (pd->dir == (*state)->direction) {
7147 		src = &(*state)->src;
7148 		dst = &(*state)->dst;
7149 		psrc = PF_PEER_SRC;
7150 	} else {
7151 		src = &(*state)->dst;
7152 		dst = &(*state)->src;
7153 		psrc = PF_PEER_DST;
7154 	}
7155 
7156 	if ((src->state >= SCTP_SHUTDOWN_SENT || src->state == SCTP_CLOSED) &&
7157 	    (dst->state >= SCTP_SHUTDOWN_SENT || dst->state == SCTP_CLOSED) &&
7158 	    pd->sctp_flags & PFDESC_SCTP_INIT) {
7159 		pf_set_protostate(*state, PF_PEER_BOTH, SCTP_CLOSED);
7160 		pf_unlink_state(*state);
7161 		*state = NULL;
7162 		return (PF_DROP);
7163 	}
7164 
7165 	/* Track state. */
7166 	if (pd->sctp_flags & PFDESC_SCTP_INIT) {
7167 		if (src->state < SCTP_COOKIE_WAIT) {
7168 			pf_set_protostate(*state, psrc, SCTP_COOKIE_WAIT);
7169 			(*state)->timeout = PFTM_SCTP_OPENING;
7170 		}
7171 	}
7172 	if (pd->sctp_flags & PFDESC_SCTP_INIT_ACK) {
7173 		MPASS(dst->scrub != NULL);
7174 		if (dst->scrub->pfss_v_tag == 0)
7175 			dst->scrub->pfss_v_tag = pd->sctp_initiate_tag;
7176 	}
7177 
7178 	if (pd->sctp_flags & (PFDESC_SCTP_COOKIE | PFDESC_SCTP_HEARTBEAT_ACK)) {
7179 		if (src->state < SCTP_ESTABLISHED) {
7180 			pf_set_protostate(*state, psrc, SCTP_ESTABLISHED);
7181 			(*state)->timeout = PFTM_SCTP_ESTABLISHED;
7182 		}
7183 	}
7184 	if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN |
7185 	    PFDESC_SCTP_SHUTDOWN_COMPLETE)) {
7186 		if (src->state < SCTP_SHUTDOWN_PENDING) {
7187 			pf_set_protostate(*state, psrc, SCTP_SHUTDOWN_PENDING);
7188 			(*state)->timeout = PFTM_SCTP_CLOSING;
7189 		}
7190 	}
7191 	if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN_COMPLETE | PFDESC_SCTP_ABORT)) {
7192 		pf_set_protostate(*state, psrc, SCTP_CLOSED);
7193 		(*state)->timeout = PFTM_SCTP_CLOSED;
7194 	}
7195 
7196 	if (pf_sctp_track(*state, pd, reason) != PF_PASS)
7197 		return (PF_DROP);
7198 
7199 	(*state)->expire = pf_get_uptime();
7200 
7201 	/* translate source/destination address, if necessary */
7202 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
7203 		uint16_t checksum = 0;
7204 		struct pf_state_key	*nk;
7205 		int			 afto, sidx, didx;
7206 
7207 		if (PF_REVERSED_KEY((*state)->key, pd->af))
7208 			nk = (*state)->key[pd->sidx];
7209 		else
7210 			nk = (*state)->key[pd->didx];
7211 
7212 		afto = pd->af != nk->af;
7213 		sidx = afto ? pd->didx : pd->sidx;
7214 		didx = afto ? pd->sidx : pd->didx;
7215 
7216 		if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) ||
7217 		    nk->port[sidx] != pd->hdr.sctp.src_port) {
7218 			pf_change_ap(pd->m, pd->src, &pd->hdr.sctp.src_port,
7219 			    pd->ip_sum, &checksum, &nk->addr[sidx],
7220 			    nk->port[sidx], 1, pd->af, pd->naf);
7221 		}
7222 
7223 		if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
7224 		    nk->port[didx] != pd->hdr.sctp.dest_port) {
7225 			pf_change_ap(pd->m, pd->dst, &pd->hdr.sctp.dest_port,
7226 			    pd->ip_sum, &checksum, &nk->addr[didx],
7227 			    nk->port[didx], 1, pd->af, pd->naf);
7228 		}
7229 
7230 		if (afto) {
7231 			PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af);
7232 			PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af);
7233 			pd->naf = nk->af;
7234 			return (PF_AFRT);
7235 		}
7236 	}
7237 
7238 	return (PF_PASS);
7239 }
7240 
7241 static void
7242 pf_sctp_multihome_detach_addr(const struct pf_kstate *s)
7243 {
7244 	struct pf_sctp_endpoint key;
7245 	struct pf_sctp_endpoint *ep;
7246 	struct pf_state_key *sks = s->key[PF_SK_STACK];
7247 	struct pf_sctp_source *i, *tmp;
7248 
7249 	if (sks == NULL || sks->proto != IPPROTO_SCTP || s->dst.scrub == NULL)
7250 		return;
7251 
7252 	PF_SCTP_ENDPOINTS_LOCK();
7253 
7254 	key.v_tag = s->dst.scrub->pfss_v_tag;
7255 	ep  = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7256 	if (ep != NULL) {
7257 		TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
7258 			if (pf_addr_cmp(&i->addr,
7259 			    &s->key[PF_SK_WIRE]->addr[s->direction == PF_OUT],
7260 			    s->key[PF_SK_WIRE]->af) == 0) {
7261 				SDT_PROBE3(pf, sctp, multihome, remove,
7262 				    key.v_tag, s, i);
7263 				TAILQ_REMOVE(&ep->sources, i, entry);
7264 				free(i, M_PFTEMP);
7265 				break;
7266 			}
7267 		}
7268 
7269 		if (TAILQ_EMPTY(&ep->sources)) {
7270 			RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
7271 			free(ep, M_PFTEMP);
7272 		}
7273 	}
7274 
7275 	/* Other direction. */
7276 	key.v_tag = s->src.scrub->pfss_v_tag;
7277 	ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7278 	if (ep != NULL) {
7279 		TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
7280 			if (pf_addr_cmp(&i->addr,
7281 			    &s->key[PF_SK_WIRE]->addr[s->direction == PF_IN],
7282 			    s->key[PF_SK_WIRE]->af) == 0) {
7283 				SDT_PROBE3(pf, sctp, multihome, remove,
7284 				    key.v_tag, s, i);
7285 				TAILQ_REMOVE(&ep->sources, i, entry);
7286 				free(i, M_PFTEMP);
7287 				break;
7288 			}
7289 		}
7290 
7291 		if (TAILQ_EMPTY(&ep->sources)) {
7292 			RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
7293 			free(ep, M_PFTEMP);
7294 		}
7295 	}
7296 
7297 	PF_SCTP_ENDPOINTS_UNLOCK();
7298 }
7299 
7300 static void
7301 pf_sctp_multihome_add_addr(struct pf_pdesc *pd, struct pf_addr *a, uint32_t v_tag)
7302 {
7303 	struct pf_sctp_endpoint key = {
7304 		.v_tag = v_tag,
7305 	};
7306 	struct pf_sctp_source *i;
7307 	struct pf_sctp_endpoint *ep;
7308 
7309 	PF_SCTP_ENDPOINTS_LOCK();
7310 
7311 	ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7312 	if (ep == NULL) {
7313 		ep = malloc(sizeof(struct pf_sctp_endpoint),
7314 		    M_PFTEMP, M_NOWAIT);
7315 		if (ep == NULL) {
7316 			PF_SCTP_ENDPOINTS_UNLOCK();
7317 			return;
7318 		}
7319 
7320 		ep->v_tag = v_tag;
7321 		TAILQ_INIT(&ep->sources);
7322 		RB_INSERT(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
7323 	}
7324 
7325 	/* Avoid inserting duplicates. */
7326 	TAILQ_FOREACH(i, &ep->sources, entry) {
7327 		if (pf_addr_cmp(&i->addr, a, pd->af) == 0) {
7328 			PF_SCTP_ENDPOINTS_UNLOCK();
7329 			return;
7330 		}
7331 	}
7332 
7333 	i = malloc(sizeof(*i), M_PFTEMP, M_NOWAIT);
7334 	if (i == NULL) {
7335 		PF_SCTP_ENDPOINTS_UNLOCK();
7336 		return;
7337 	}
7338 
7339 	i->af = pd->af;
7340 	memcpy(&i->addr, a, sizeof(*a));
7341 	TAILQ_INSERT_TAIL(&ep->sources, i, entry);
7342 	SDT_PROBE2(pf, sctp, multihome, add, v_tag, i);
7343 
7344 	PF_SCTP_ENDPOINTS_UNLOCK();
7345 }
7346 
7347 static void
7348 pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif,
7349     struct pf_kstate *s, int action)
7350 {
7351 	struct pf_sctp_multihome_job	*j, *tmp;
7352 	struct pf_sctp_source		*i;
7353 	int			 ret __unused;
7354 	struct pf_kstate	*sm = NULL;
7355 	struct pf_krule		*ra = NULL;
7356 	struct pf_krule		*r = &V_pf_default_rule;
7357 	struct pf_kruleset	*rs = NULL;
7358 	bool do_extra = true;
7359 
7360 	PF_RULES_RLOCK_TRACKER;
7361 
7362 again:
7363 	TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) {
7364 		if (s == NULL || action != PF_PASS)
7365 			goto free;
7366 
7367 		/* Confirm we don't recurse here. */
7368 		MPASS(! (pd->sctp_flags & PFDESC_SCTP_ADD_IP));
7369 
7370 		switch (j->op) {
7371 		case  SCTP_ADD_IP_ADDRESS: {
7372 			uint32_t v_tag = pd->sctp_initiate_tag;
7373 
7374 			if (v_tag == 0) {
7375 				if (s->direction == pd->dir)
7376 					v_tag = s->src.scrub->pfss_v_tag;
7377 				else
7378 					v_tag = s->dst.scrub->pfss_v_tag;
7379 			}
7380 
7381 			/*
7382 			 * Avoid duplicating states. We'll already have
7383 			 * created a state based on the source address of
7384 			 * the packet, but SCTP endpoints may also list this
7385 			 * address again in the INIT(_ACK) parameters.
7386 			 */
7387 			if (pf_addr_cmp(&j->src, pd->src, pd->af) == 0) {
7388 				break;
7389 			}
7390 
7391 			j->pd.sctp_flags |= PFDESC_SCTP_ADD_IP;
7392 			PF_RULES_RLOCK();
7393 			sm = NULL;
7394 			ret = pf_test_rule(&r, &sm,
7395 			    &j->pd, &ra, &rs, NULL);
7396 			PF_RULES_RUNLOCK();
7397 			SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->pd.m, ret);
7398 			if (ret != PF_DROP && sm != NULL) {
7399 				/* Inherit v_tag values. */
7400 				if (sm->direction == s->direction) {
7401 					sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
7402 					sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
7403 				} else {
7404 					sm->src.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
7405 					sm->dst.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
7406 				}
7407 				PF_STATE_UNLOCK(sm);
7408 			} else {
7409 				/* If we try duplicate inserts? */
7410 				break;
7411 			}
7412 
7413 			/* Only add the address if we've actually allowed the state. */
7414 			pf_sctp_multihome_add_addr(pd, &j->src, v_tag);
7415 
7416 			if (! do_extra) {
7417 				break;
7418 			}
7419 			/*
7420 			 * We need to do this for each of our source addresses.
7421 			 * Find those based on the verification tag.
7422 			 */
7423 			struct pf_sctp_endpoint key = {
7424 				.v_tag = pd->hdr.sctp.v_tag,
7425 			};
7426 			struct pf_sctp_endpoint *ep;
7427 
7428 			PF_SCTP_ENDPOINTS_LOCK();
7429 			ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
7430 			if (ep == NULL) {
7431 				PF_SCTP_ENDPOINTS_UNLOCK();
7432 				break;
7433 			}
7434 			MPASS(ep != NULL);
7435 
7436 			TAILQ_FOREACH(i, &ep->sources, entry) {
7437 				struct pf_sctp_multihome_job *nj;
7438 
7439 				/* SCTP can intermingle IPv4 and IPv6. */
7440 				if (i->af != pd->af)
7441 					continue;
7442 
7443 				nj = malloc(sizeof(*nj), M_PFTEMP, M_NOWAIT | M_ZERO);
7444 				if (! nj) {
7445 					continue;
7446 				}
7447 				memcpy(&nj->pd, &j->pd, sizeof(j->pd));
7448 				memcpy(&nj->src, &j->src, sizeof(nj->src));
7449 				nj->pd.src = &nj->src;
7450 				// New destination address!
7451 				memcpy(&nj->dst, &i->addr, sizeof(nj->dst));
7452 				nj->pd.dst = &nj->dst;
7453 				nj->pd.m = j->pd.m;
7454 				nj->op = j->op;
7455 
7456 				TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next);
7457 			}
7458 			PF_SCTP_ENDPOINTS_UNLOCK();
7459 
7460 			break;
7461 		}
7462 		case SCTP_DEL_IP_ADDRESS: {
7463 			struct pf_state_key_cmp key;
7464 			uint8_t psrc;
7465 
7466 			bzero(&key, sizeof(key));
7467 			key.af = j->pd.af;
7468 			key.proto = IPPROTO_SCTP;
7469 			if (j->pd.dir == PF_IN)	{	/* wire side, straight */
7470 				PF_ACPY(&key.addr[0], j->pd.src, key.af);
7471 				PF_ACPY(&key.addr[1], j->pd.dst, key.af);
7472 				key.port[0] = j->pd.hdr.sctp.src_port;
7473 				key.port[1] = j->pd.hdr.sctp.dest_port;
7474 			} else {			/* stack side, reverse */
7475 				PF_ACPY(&key.addr[1], j->pd.src, key.af);
7476 				PF_ACPY(&key.addr[0], j->pd.dst, key.af);
7477 				key.port[1] = j->pd.hdr.sctp.src_port;
7478 				key.port[0] = j->pd.hdr.sctp.dest_port;
7479 			}
7480 
7481 			sm = pf_find_state(kif, &key, j->pd.dir);
7482 			if (sm != NULL) {
7483 				PF_STATE_LOCK_ASSERT(sm);
7484 				if (j->pd.dir == sm->direction) {
7485 					psrc = PF_PEER_SRC;
7486 				} else {
7487 					psrc = PF_PEER_DST;
7488 				}
7489 				pf_set_protostate(sm, psrc, SCTP_SHUTDOWN_PENDING);
7490 				sm->timeout = PFTM_SCTP_CLOSING;
7491 				PF_STATE_UNLOCK(sm);
7492 			}
7493 			break;
7494 		default:
7495 			panic("Unknown op %#x", j->op);
7496 		}
7497 	}
7498 
7499 	free:
7500 		TAILQ_REMOVE(&pd->sctp_multihome_jobs, j, next);
7501 		free(j, M_PFTEMP);
7502 	}
7503 
7504 	/* We may have inserted extra work while processing the list. */
7505 	if (! TAILQ_EMPTY(&pd->sctp_multihome_jobs)) {
7506 		do_extra = false;
7507 		goto again;
7508 	}
7509 }
7510 
7511 static int
7512 pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op)
7513 {
7514 	int			 off = 0;
7515 	struct pf_sctp_multihome_job	*job;
7516 
7517 	while (off < len) {
7518 		struct sctp_paramhdr h;
7519 
7520 		if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL,
7521 		    pd->af))
7522 			return (PF_DROP);
7523 
7524 		/* Parameters are at least 4 bytes. */
7525 		if (ntohs(h.param_length) < 4)
7526 			return (PF_DROP);
7527 
7528 		switch (ntohs(h.param_type)) {
7529 		case  SCTP_IPV4_ADDRESS: {
7530 			struct in_addr t;
7531 
7532 			if (ntohs(h.param_length) !=
7533 			    (sizeof(struct sctp_paramhdr) + sizeof(t)))
7534 				return (PF_DROP);
7535 
7536 			if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
7537 			    NULL, NULL, pd->af))
7538 				return (PF_DROP);
7539 
7540 			if (in_nullhost(t))
7541 				t.s_addr = pd->src->v4.s_addr;
7542 
7543 			/*
7544 			 * We hold the state lock (idhash) here, which means
7545 			 * that we can't acquire the keyhash, or we'll get a
7546 			 * LOR (and potentially double-lock things too). We also
7547 			 * can't release the state lock here, so instead we'll
7548 			 * enqueue this for async handling.
7549 			 * There's a relatively small race here, in that a
7550 			 * packet using the new addresses could arrive already,
7551 			 * but that's just though luck for it.
7552 			 */
7553 			job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
7554 			if (! job)
7555 				return (PF_DROP);
7556 
7557 			memcpy(&job->pd, pd, sizeof(*pd));
7558 
7559 			// New source address!
7560 			memcpy(&job->src, &t, sizeof(t));
7561 			job->pd.src = &job->src;
7562 			memcpy(&job->dst, pd->dst, sizeof(job->dst));
7563 			job->pd.dst = &job->dst;
7564 			job->pd.m = pd->m;
7565 			job->op = op;
7566 
7567 			TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
7568 			break;
7569 		}
7570 #ifdef INET6
7571 		case SCTP_IPV6_ADDRESS: {
7572 			struct in6_addr t;
7573 
7574 			if (ntohs(h.param_length) !=
7575 			    (sizeof(struct sctp_paramhdr) + sizeof(t)))
7576 				return (PF_DROP);
7577 
7578 			if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t),
7579 			    NULL, NULL, pd->af))
7580 				return (PF_DROP);
7581 			if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0)
7582 				break;
7583 			if (memcmp(&t, &in6addr_any, sizeof(t)) == 0)
7584 				memcpy(&t, &pd->src->v6, sizeof(t));
7585 
7586 			job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO);
7587 			if (! job)
7588 				return (PF_DROP);
7589 
7590 			memcpy(&job->pd, pd, sizeof(*pd));
7591 			memcpy(&job->src, &t, sizeof(t));
7592 			job->pd.src = &job->src;
7593 			memcpy(&job->dst, pd->dst, sizeof(job->dst));
7594 			job->pd.dst = &job->dst;
7595 			job->pd.m = pd->m;
7596 			job->op = op;
7597 
7598 			TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next);
7599 			break;
7600 		}
7601 #endif
7602 		case SCTP_ADD_IP_ADDRESS: {
7603 			int ret;
7604 			struct sctp_asconf_paramhdr ah;
7605 
7606 			if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
7607 			    NULL, NULL, pd->af))
7608 				return (PF_DROP);
7609 
7610 			ret = pf_multihome_scan(start + off + sizeof(ah),
7611 			    ntohs(ah.ph.param_length) - sizeof(ah), pd,
7612 			    SCTP_ADD_IP_ADDRESS);
7613 			if (ret != PF_PASS)
7614 				return (ret);
7615 			break;
7616 		}
7617 		case SCTP_DEL_IP_ADDRESS: {
7618 			int ret;
7619 			struct sctp_asconf_paramhdr ah;
7620 
7621 			if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah),
7622 			    NULL, NULL, pd->af))
7623 				return (PF_DROP);
7624 			ret = pf_multihome_scan(start + off + sizeof(ah),
7625 			    ntohs(ah.ph.param_length) - sizeof(ah), pd,
7626 			    SCTP_DEL_IP_ADDRESS);
7627 			if (ret != PF_PASS)
7628 				return (ret);
7629 			break;
7630 		}
7631 		default:
7632 			break;
7633 		}
7634 
7635 		off += roundup(ntohs(h.param_length), 4);
7636 	}
7637 
7638 	return (PF_PASS);
7639 }
7640 int
7641 pf_multihome_scan_init(int start, int len, struct pf_pdesc *pd)
7642 {
7643 	start += sizeof(struct sctp_init_chunk);
7644 	len -= sizeof(struct sctp_init_chunk);
7645 
7646 	return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
7647 }
7648 
7649 int
7650 pf_multihome_scan_asconf(int start, int len, struct pf_pdesc *pd)
7651 {
7652 	start += sizeof(struct sctp_asconf_chunk);
7653 	len -= sizeof(struct sctp_asconf_chunk);
7654 
7655 	return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS));
7656 }
7657 
7658 int
7659 pf_icmp_state_lookup(struct pf_state_key_cmp *key, struct pf_pdesc *pd,
7660     struct pf_kstate **state, int direction,
7661     u_int16_t icmpid, u_int16_t type, int icmp_dir,
7662     int *iidx, int multi, int inner)
7663 {
7664 	key->af = pd->af;
7665 	key->proto = pd->proto;
7666 	if (icmp_dir == PF_IN) {
7667 		*iidx = pd->sidx;
7668 		key->port[pd->sidx] = icmpid;
7669 		key->port[pd->didx] = type;
7670 	} else {
7671 		*iidx = pd->didx;
7672 		key->port[pd->sidx] = type;
7673 		key->port[pd->didx] = icmpid;
7674 	}
7675 	if (pf_state_key_addr_setup(pd, key, multi))
7676 		return (PF_DROP);
7677 
7678 	STATE_LOOKUP(key, *state, pd);
7679 
7680 	if ((*state)->state_flags & PFSTATE_SLOPPY)
7681 		return (-1);
7682 
7683 	/* Is this ICMP message flowing in right direction? */
7684 	if ((*state)->rule->type &&
7685 	    (((!inner && (*state)->direction == direction) ||
7686 	    (inner && (*state)->direction != direction)) ?
7687 	    PF_IN : PF_OUT) != icmp_dir) {
7688 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
7689 			printf("pf: icmp type %d in wrong direction (%d): ",
7690 			    ntohs(type), icmp_dir);
7691 			pf_print_state(*state);
7692 			printf("\n");
7693 		}
7694 		PF_STATE_UNLOCK(*state);
7695 		*state = NULL;
7696 		return (PF_DROP);
7697 	}
7698 	return (-1);
7699 }
7700 
7701 static int
7702 pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
7703     u_short *reason)
7704 {
7705 	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
7706 	u_int16_t	*icmpsum, virtual_id, virtual_type;
7707 	u_int8_t	 icmptype, icmpcode;
7708 	int		 icmp_dir, iidx, ret, multi;
7709 	struct pf_state_key_cmp key;
7710 #ifdef INET
7711 	u_int16_t	 icmpid;
7712 #endif
7713 
7714 	MPASS(*state == NULL);
7715 
7716 	bzero(&key, sizeof(key));
7717 	switch (pd->proto) {
7718 #ifdef INET
7719 	case IPPROTO_ICMP:
7720 		icmptype = pd->hdr.icmp.icmp_type;
7721 		icmpcode = pd->hdr.icmp.icmp_code;
7722 		icmpid = pd->hdr.icmp.icmp_id;
7723 		icmpsum = &pd->hdr.icmp.icmp_cksum;
7724 		break;
7725 #endif /* INET */
7726 #ifdef INET6
7727 	case IPPROTO_ICMPV6:
7728 		icmptype = pd->hdr.icmp6.icmp6_type;
7729 		icmpcode = pd->hdr.icmp6.icmp6_code;
7730 #ifdef INET
7731 		icmpid = pd->hdr.icmp6.icmp6_id;
7732 #endif
7733 		icmpsum = &pd->hdr.icmp6.icmp6_cksum;
7734 		break;
7735 #endif /* INET6 */
7736 	}
7737 
7738 	if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &multi,
7739 	    &virtual_id, &virtual_type) == 0) {
7740 		/*
7741 		 * ICMP query/reply message not related to a TCP/UDP packet.
7742 		 * Search for an ICMP state.
7743 		 */
7744 		ret = pf_icmp_state_lookup(&key, pd, state, pd->dir,
7745 		    virtual_id, virtual_type, icmp_dir, &iidx,
7746 		    PF_ICMP_MULTI_NONE, 0);
7747 		if (ret >= 0) {
7748 			MPASS(*state == NULL);
7749 			if (ret == PF_DROP && pd->af == AF_INET6 &&
7750 			    icmp_dir == PF_OUT) {
7751 				ret = pf_icmp_state_lookup(&key, pd, state,
7752 				    pd->dir, virtual_id, virtual_type,
7753 				    icmp_dir, &iidx, multi, 0);
7754 				if (ret >= 0) {
7755 					MPASS(*state == NULL);
7756 					return (ret);
7757 				}
7758 			} else
7759 				return (ret);
7760 		}
7761 
7762 		(*state)->expire = pf_get_uptime();
7763 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7764 
7765 		/* translate source/destination address, if necessary */
7766 		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
7767 			struct pf_state_key	*nk;
7768 			int			 afto, sidx, didx;
7769 
7770 			if (PF_REVERSED_KEY((*state)->key, pd->af))
7771 				nk = (*state)->key[pd->sidx];
7772 			else
7773 				nk = (*state)->key[pd->didx];
7774 
7775 			afto = pd->af != nk->af;
7776 			sidx = afto ? pd->didx : pd->sidx;
7777 			didx = afto ? pd->sidx : pd->didx;
7778 			iidx = afto ? !iidx : iidx;
7779 
7780 			switch (pd->af) {
7781 #ifdef INET
7782 			case AF_INET:
7783 #ifdef INET6
7784 				if (afto) {
7785 					if (pf_translate_icmp_af(AF_INET6,
7786 					    &pd->hdr.icmp))
7787 						return (PF_DROP);
7788 					pd->proto = IPPROTO_ICMPV6;
7789 				}
7790 #endif
7791 				if (!afto &&
7792 				    PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET))
7793 					pf_change_a(&saddr->v4.s_addr,
7794 					    pd->ip_sum,
7795 					    nk->addr[sidx].v4.s_addr,
7796 					    0);
7797 
7798 				if (!afto && PF_ANEQ(pd->dst,
7799 				    &nk->addr[didx], AF_INET))
7800 					pf_change_a(&daddr->v4.s_addr,
7801 					    pd->ip_sum,
7802 					    nk->addr[didx].v4.s_addr, 0);
7803 
7804 				if (nk->port[iidx] !=
7805 				    pd->hdr.icmp.icmp_id) {
7806 					pd->hdr.icmp.icmp_cksum =
7807 					    pf_cksum_fixup(
7808 					    pd->hdr.icmp.icmp_cksum, icmpid,
7809 					    nk->port[iidx], 0);
7810 					pd->hdr.icmp.icmp_id =
7811 					    nk->port[iidx];
7812 				}
7813 
7814 				m_copyback(pd->m, pd->off, ICMP_MINLEN,
7815 				    (caddr_t )&pd->hdr.icmp);
7816 				break;
7817 #endif /* INET */
7818 #ifdef INET6
7819 			case AF_INET6:
7820 #ifdef INET
7821 				if (afto) {
7822 					if (pf_translate_icmp_af(AF_INET,
7823 					    &pd->hdr.icmp6))
7824 						return (PF_DROP);
7825 					pd->proto = IPPROTO_ICMP;
7826 				}
7827 #endif
7828 				if (!afto &&
7829 				    PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET6))
7830 					pf_change_a6(saddr,
7831 					    &pd->hdr.icmp6.icmp6_cksum,
7832 					    &nk->addr[sidx], 0);
7833 
7834 				if (!afto && PF_ANEQ(pd->dst,
7835 				    &nk->addr[didx], AF_INET6))
7836 					pf_change_a6(daddr,
7837 					    &pd->hdr.icmp6.icmp6_cksum,
7838 					    &nk->addr[didx], 0);
7839 
7840 				if (nk->port[iidx] != pd->hdr.icmp6.icmp6_id)
7841 					pd->hdr.icmp6.icmp6_id =
7842 					    nk->port[iidx];
7843 
7844 				m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
7845 				    (caddr_t )&pd->hdr.icmp6);
7846 				break;
7847 #endif /* INET6 */
7848 			}
7849 			if (afto) {
7850 				PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af);
7851 				PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af);
7852 				pd->naf = nk->af;
7853 				return (PF_AFRT);
7854 			}
7855 		}
7856 		return (PF_PASS);
7857 
7858 	} else {
7859 		/*
7860 		 * ICMP error message in response to a TCP/UDP packet.
7861 		 * Extract the inner TCP/UDP header and search for that state.
7862 		 */
7863 
7864 		struct pf_pdesc	pd2;
7865 		bzero(&pd2, sizeof pd2);
7866 #ifdef INET
7867 		struct ip	h2;
7868 #endif /* INET */
7869 #ifdef INET6
7870 		struct ip6_hdr	h2_6;
7871 		int		fragoff2, extoff2;
7872 		u_int32_t	jumbolen;
7873 #endif /* INET6 */
7874 		int		ipoff2 = 0;
7875 
7876 		pd2.af = pd->af;
7877 		pd2.dir = pd->dir;
7878 		/* Payload packet is from the opposite direction. */
7879 		pd2.sidx = (pd->dir == PF_IN) ? 1 : 0;
7880 		pd2.didx = (pd->dir == PF_IN) ? 0 : 1;
7881 		pd2.m = pd->m;
7882 		switch (pd->af) {
7883 #ifdef INET
7884 		case AF_INET:
7885 			/* offset of h2 in mbuf chain */
7886 			ipoff2 = pd->off + ICMP_MINLEN;
7887 
7888 			if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
7889 			    NULL, reason, pd2.af)) {
7890 				DPFPRINTF(PF_DEBUG_MISC,
7891 				    ("pf: ICMP error message too short "
7892 				    "(ip)\n"));
7893 				return (PF_DROP);
7894 			}
7895 			/*
7896 			 * ICMP error messages don't refer to non-first
7897 			 * fragments
7898 			 */
7899 			if (h2.ip_off & htons(IP_OFFMASK)) {
7900 				REASON_SET(reason, PFRES_FRAG);
7901 				return (PF_DROP);
7902 			}
7903 
7904 			/* offset of protocol header that follows h2 */
7905 			pd2.off = ipoff2 + (h2.ip_hl << 2);
7906 
7907 			pd2.proto = h2.ip_p;
7908 			pd2.src = (struct pf_addr *)&h2.ip_src;
7909 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
7910 			pd2.ip_sum = &h2.ip_sum;
7911 			break;
7912 #endif /* INET */
7913 #ifdef INET6
7914 		case AF_INET6:
7915 			ipoff2 = pd->off + sizeof(struct icmp6_hdr);
7916 
7917 			if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
7918 			    NULL, reason, pd2.af)) {
7919 				DPFPRINTF(PF_DEBUG_MISC,
7920 				    ("pf: ICMP error message too short "
7921 				    "(ip6)\n"));
7922 				return (PF_DROP);
7923 			}
7924 			pd2.off = ipoff2;
7925 			if (pf_walk_header6(pd->m, &h2_6, &pd2.off, &extoff2,
7926 				&fragoff2, &pd2.proto, &jumbolen,
7927 				reason) != PF_PASS)
7928 				return (PF_DROP);
7929 
7930 			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
7931 			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
7932 			pd2.ip_sum = NULL;
7933 			break;
7934 #endif /* INET6 */
7935 		}
7936 
7937 		if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
7938 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
7939 				printf("pf: BAD ICMP %d:%d outer dst: ",
7940 				    icmptype, icmpcode);
7941 				pf_print_host(pd->src, 0, pd->af);
7942 				printf(" -> ");
7943 				pf_print_host(pd->dst, 0, pd->af);
7944 				printf(" inner src: ");
7945 				pf_print_host(pd2.src, 0, pd2.af);
7946 				printf(" -> ");
7947 				pf_print_host(pd2.dst, 0, pd2.af);
7948 				printf("\n");
7949 			}
7950 			REASON_SET(reason, PFRES_BADSTATE);
7951 			return (PF_DROP);
7952 		}
7953 
7954 		switch (pd2.proto) {
7955 		case IPPROTO_TCP: {
7956 			struct tcphdr		 th;
7957 			u_int32_t		 seq;
7958 			struct pf_state_peer	*src, *dst;
7959 			u_int8_t		 dws;
7960 			int			 copyback = 0;
7961 
7962 			/*
7963 			 * Only the first 8 bytes of the TCP header can be
7964 			 * expected. Don't access any TCP header fields after
7965 			 * th_seq, an ackskew test is not possible.
7966 			 */
7967 			if (!pf_pull_hdr(pd->m, pd2.off, &th, 8, NULL, reason,
7968 			    pd2.af)) {
7969 				DPFPRINTF(PF_DEBUG_MISC,
7970 				    ("pf: ICMP error message too short "
7971 				    "(tcp)\n"));
7972 				return (PF_DROP);
7973 			}
7974 
7975 			key.af = pd2.af;
7976 			key.proto = IPPROTO_TCP;
7977 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
7978 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
7979 			key.port[pd2.sidx] = th.th_sport;
7980 			key.port[pd2.didx] = th.th_dport;
7981 
7982 			STATE_LOOKUP(&key, *state, pd);
7983 
7984 			if (pd->dir == (*state)->direction) {
7985 				if (PF_REVERSED_KEY((*state)->key, pd->af)) {
7986 					src = &(*state)->src;
7987 					dst = &(*state)->dst;
7988 				} else {
7989 					src = &(*state)->dst;
7990 					dst = &(*state)->src;
7991 				}
7992 			} else {
7993 				if (PF_REVERSED_KEY((*state)->key, pd->af)) {
7994 					src = &(*state)->dst;
7995 					dst = &(*state)->src;
7996 				} else {
7997 					src = &(*state)->src;
7998 					dst = &(*state)->dst;
7999 				}
8000 			}
8001 
8002 			if (src->wscale && dst->wscale)
8003 				dws = dst->wscale & PF_WSCALE_MASK;
8004 			else
8005 				dws = 0;
8006 
8007 			/* Demodulate sequence number */
8008 			seq = ntohl(th.th_seq) - src->seqdiff;
8009 			if (src->seqdiff) {
8010 				pf_change_a(&th.th_seq, icmpsum,
8011 				    htonl(seq), 0);
8012 				copyback = 1;
8013 			}
8014 
8015 			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
8016 			    (!SEQ_GEQ(src->seqhi, seq) ||
8017 			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
8018 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
8019 					printf("pf: BAD ICMP %d:%d ",
8020 					    icmptype, icmpcode);
8021 					pf_print_host(pd->src, 0, pd->af);
8022 					printf(" -> ");
8023 					pf_print_host(pd->dst, 0, pd->af);
8024 					printf(" state: ");
8025 					pf_print_state(*state);
8026 					printf(" seq=%u\n", seq);
8027 				}
8028 				REASON_SET(reason, PFRES_BADSTATE);
8029 				return (PF_DROP);
8030 			} else {
8031 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
8032 					printf("pf: OK ICMP %d:%d ",
8033 					    icmptype, icmpcode);
8034 					pf_print_host(pd->src, 0, pd->af);
8035 					printf(" -> ");
8036 					pf_print_host(pd->dst, 0, pd->af);
8037 					printf(" state: ");
8038 					pf_print_state(*state);
8039 					printf(" seq=%u\n", seq);
8040 				}
8041 			}
8042 
8043 			/* translate source/destination address, if necessary */
8044 			if ((*state)->key[PF_SK_WIRE] !=
8045 			    (*state)->key[PF_SK_STACK]) {
8046 
8047 				struct pf_state_key	*nk;
8048 
8049 				if (PF_REVERSED_KEY((*state)->key, pd->af))
8050 					nk = (*state)->key[pd->sidx];
8051 				else
8052 					nk = (*state)->key[pd->didx];
8053 
8054 #if defined(INET) && defined(INET6)
8055 				int	 afto, sidx, didx;
8056 
8057 				afto = pd->af != nk->af;
8058 				sidx = afto ? pd2.didx : pd2.sidx;
8059 				didx = afto ? pd2.sidx : pd2.didx;
8060 
8061 				if (afto) {
8062 					if (pf_translate_icmp_af(nk->af,
8063 					    &pd->hdr.icmp))
8064 						return (PF_DROP);
8065 					m_copyback(pd->m, pd->off,
8066 					    sizeof(struct icmp6_hdr),
8067 					    (c_caddr_t)&pd->hdr.icmp6);
8068 					if (pf_change_icmp_af(pd->m, ipoff2, pd,
8069 					    &pd2, &nk->addr[sidx],
8070 					    &nk->addr[didx], pd->af,
8071 					    nk->af))
8072 						return (PF_DROP);
8073 					if (nk->af == AF_INET)
8074 						pd->proto = IPPROTO_ICMP;
8075 					else
8076 						pd->proto = IPPROTO_ICMPV6;
8077 					th.th_sport = nk->port[sidx];
8078 					th.th_dport = nk->port[didx];
8079 					m_copyback(pd2.m, pd2.off, 8, (c_caddr_t)&th);
8080 					PF_ACPY(pd->src,
8081 					    &nk->addr[pd2.sidx], nk->af);
8082 					PF_ACPY(pd->dst,
8083 					    &nk->addr[pd2.didx], nk->af);
8084 					pd->naf = nk->af;
8085 					return (PF_AFRT);
8086 				}
8087 #endif
8088 
8089 				if (PF_ANEQ(pd2.src,
8090 				    &nk->addr[pd2.sidx], pd2.af) ||
8091 				    nk->port[pd2.sidx] != th.th_sport)
8092 					pf_change_icmp(pd2.src, &th.th_sport,
8093 					    daddr, &nk->addr[pd2.sidx],
8094 					    nk->port[pd2.sidx], NULL,
8095 					    pd2.ip_sum, icmpsum,
8096 					    pd->ip_sum, 0, pd2.af);
8097 
8098 				if (PF_ANEQ(pd2.dst,
8099 				    &nk->addr[pd2.didx], pd2.af) ||
8100 				    nk->port[pd2.didx] != th.th_dport)
8101 					pf_change_icmp(pd2.dst, &th.th_dport,
8102 					    saddr, &nk->addr[pd2.didx],
8103 					    nk->port[pd2.didx], NULL,
8104 					    pd2.ip_sum, icmpsum,
8105 					    pd->ip_sum, 0, pd2.af);
8106 				copyback = 1;
8107 			}
8108 
8109 			if (copyback) {
8110 				switch (pd2.af) {
8111 #ifdef INET
8112 				case AF_INET:
8113 					m_copyback(pd->m, pd->off, ICMP_MINLEN,
8114 					    (caddr_t )&pd->hdr.icmp);
8115 					m_copyback(pd->m, ipoff2, sizeof(h2),
8116 					    (caddr_t )&h2);
8117 					break;
8118 #endif /* INET */
8119 #ifdef INET6
8120 				case AF_INET6:
8121 					m_copyback(pd->m, pd->off,
8122 					    sizeof(struct icmp6_hdr),
8123 					    (caddr_t )&pd->hdr.icmp6);
8124 					m_copyback(pd->m, ipoff2, sizeof(h2_6),
8125 					    (caddr_t )&h2_6);
8126 					break;
8127 #endif /* INET6 */
8128 				}
8129 				m_copyback(pd->m, pd2.off, 8, (caddr_t)&th);
8130 			}
8131 
8132 			return (PF_PASS);
8133 			break;
8134 		}
8135 		case IPPROTO_UDP: {
8136 			struct udphdr		uh;
8137 
8138 			if (!pf_pull_hdr(pd->m, pd2.off, &uh, sizeof(uh),
8139 			    NULL, reason, pd2.af)) {
8140 				DPFPRINTF(PF_DEBUG_MISC,
8141 				    ("pf: ICMP error message too short "
8142 				    "(udp)\n"));
8143 				return (PF_DROP);
8144 			}
8145 
8146 			key.af = pd2.af;
8147 			key.proto = IPPROTO_UDP;
8148 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
8149 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
8150 			key.port[pd2.sidx] = uh.uh_sport;
8151 			key.port[pd2.didx] = uh.uh_dport;
8152 
8153 			STATE_LOOKUP(&key, *state, pd);
8154 
8155 			/* translate source/destination address, if necessary */
8156 			if ((*state)->key[PF_SK_WIRE] !=
8157 			    (*state)->key[PF_SK_STACK]) {
8158 				struct pf_state_key	*nk;
8159 
8160 				if (PF_REVERSED_KEY((*state)->key, pd->af))
8161 					nk = (*state)->key[pd->sidx];
8162 				else
8163 					nk = (*state)->key[pd->didx];
8164 
8165 #if defined(INET) && defined(INET6)
8166 				int	 afto, sidx, didx;
8167 
8168 				afto = pd->af != nk->af;
8169 				sidx = afto ? pd2.didx : pd2.sidx;
8170 				didx = afto ? pd2.sidx : pd2.didx;
8171 
8172 				if (afto) {
8173 					if (pf_translate_icmp_af(nk->af,
8174 					    &pd->hdr.icmp))
8175 						return (PF_DROP);
8176 					m_copyback(pd->m, pd->off,
8177 					    sizeof(struct icmp6_hdr),
8178 					    (c_caddr_t)&pd->hdr.icmp6);
8179 					if (pf_change_icmp_af(pd->m, ipoff2, pd,
8180 					    &pd2, &nk->addr[sidx],
8181 					    &nk->addr[didx], pd->af,
8182 					    nk->af))
8183 						return (PF_DROP);
8184 					if (nk->af == AF_INET)
8185 						pd->proto = IPPROTO_ICMP;
8186 					else
8187 						pd->proto = IPPROTO_ICMPV6;
8188 					pf_change_ap(pd->m, pd2.src, &uh.uh_sport,
8189 					    pd->ip_sum, &uh.uh_sum, &nk->addr[pd2.sidx],
8190 					    nk->port[sidx], 1, pd->af, nk->af);
8191 					pf_change_ap(pd->m, pd2.dst, &uh.uh_dport,
8192 					    pd->ip_sum, &uh.uh_sum, &nk->addr[pd2.didx],
8193 					    nk->port[didx], 1, pd->af, nk->af);
8194 					m_copyback(pd2.m, pd2.off, sizeof(uh),
8195 					    (c_caddr_t)&uh);
8196 					PF_ACPY(&pd->nsaddr,
8197 					    &nk->addr[pd2.sidx], nk->af);
8198 					PF_ACPY(&pd->ndaddr,
8199 					    &nk->addr[pd2.didx], nk->af);
8200 					pd->naf = nk->af;
8201 					return (PF_AFRT);
8202 				}
8203 #endif
8204 
8205 				if (PF_ANEQ(pd2.src,
8206 				    &nk->addr[pd2.sidx], pd2.af) ||
8207 				    nk->port[pd2.sidx] != uh.uh_sport)
8208 					pf_change_icmp(pd2.src, &uh.uh_sport,
8209 					    daddr, &nk->addr[pd2.sidx],
8210 					    nk->port[pd2.sidx], &uh.uh_sum,
8211 					    pd2.ip_sum, icmpsum,
8212 					    pd->ip_sum, 1, pd2.af);
8213 
8214 				if (PF_ANEQ(pd2.dst,
8215 				    &nk->addr[pd2.didx], pd2.af) ||
8216 				    nk->port[pd2.didx] != uh.uh_dport)
8217 					pf_change_icmp(pd2.dst, &uh.uh_dport,
8218 					    saddr, &nk->addr[pd2.didx],
8219 					    nk->port[pd2.didx], &uh.uh_sum,
8220 					    pd2.ip_sum, icmpsum,
8221 					    pd->ip_sum, 1, pd2.af);
8222 
8223 				switch (pd2.af) {
8224 #ifdef INET
8225 				case AF_INET:
8226 					m_copyback(pd->m, pd->off, ICMP_MINLEN,
8227 					    (caddr_t )&pd->hdr.icmp);
8228 					m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
8229 					break;
8230 #endif /* INET */
8231 #ifdef INET6
8232 				case AF_INET6:
8233 					m_copyback(pd->m, pd->off,
8234 					    sizeof(struct icmp6_hdr),
8235 					    (caddr_t )&pd->hdr.icmp6);
8236 					m_copyback(pd->m, ipoff2, sizeof(h2_6),
8237 					    (caddr_t )&h2_6);
8238 					break;
8239 #endif /* INET6 */
8240 				}
8241 				m_copyback(pd->m, pd2.off, sizeof(uh), (caddr_t)&uh);
8242 			}
8243 			return (PF_PASS);
8244 			break;
8245 		}
8246 #ifdef INET
8247 		case IPPROTO_ICMP: {
8248 			struct icmp	*iih = &pd2.hdr.icmp;
8249 
8250 			if (pd2.af != AF_INET) {
8251 				REASON_SET(reason, PFRES_NORM);
8252 				return (PF_DROP);
8253 			}
8254 
8255 			if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
8256 			    NULL, reason, pd2.af)) {
8257 				DPFPRINTF(PF_DEBUG_MISC,
8258 				    ("pf: ICMP error message too short i"
8259 				    "(icmp)\n"));
8260 				return (PF_DROP);
8261 			}
8262 
8263 			icmpid = iih->icmp_id;
8264 			pf_icmp_mapping(&pd2, iih->icmp_type,
8265 			    &icmp_dir, &multi, &virtual_id, &virtual_type);
8266 
8267 			ret = pf_icmp_state_lookup(&key, &pd2, state,
8268 			    pd2.dir, virtual_id, virtual_type,
8269 			    icmp_dir, &iidx, PF_ICMP_MULTI_NONE, 1);
8270 			if (ret >= 0) {
8271 				MPASS(*state == NULL);
8272 				return (ret);
8273 			}
8274 
8275 			/* translate source/destination address, if necessary */
8276 			if ((*state)->key[PF_SK_WIRE] !=
8277 			    (*state)->key[PF_SK_STACK]) {
8278 				struct pf_state_key	*nk;
8279 
8280 				if (PF_REVERSED_KEY((*state)->key, pd->af))
8281 					nk = (*state)->key[pd->sidx];
8282 				else
8283 					nk = (*state)->key[pd->didx];
8284 
8285 #if defined(INET) && defined(INET6)
8286 				int	 afto, sidx, didx;
8287 
8288 				afto = pd->af != nk->af;
8289 				sidx = afto ? pd2.didx : pd2.sidx;
8290 				didx = afto ? pd2.sidx : pd2.didx;
8291 				iidx = afto ? !iidx : iidx;
8292 
8293 				if (afto) {
8294 					if (nk->af != AF_INET6)
8295 						return (PF_DROP);
8296 					if (pf_translate_icmp_af(nk->af,
8297 					    &pd->hdr.icmp))
8298 						return (PF_DROP);
8299 					m_copyback(pd->m, pd->off,
8300 					    sizeof(struct icmp6_hdr),
8301 					    (c_caddr_t)&pd->hdr.icmp6);
8302 					if (pf_change_icmp_af(pd->m, ipoff2, pd,
8303 					    &pd2, &nk->addr[sidx],
8304 					    &nk->addr[didx], pd->af,
8305 					    nk->af))
8306 						return (PF_DROP);
8307 					pd->proto = IPPROTO_ICMPV6;
8308 					if (pf_translate_icmp_af(nk->af, &iih))
8309 						return (PF_DROP);
8310 					if (virtual_type == htons(ICMP_ECHO) &&
8311 					    nk->port[iidx] != iih->icmp_id)
8312 						iih->icmp_id = nk->port[iidx];
8313 					m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
8314 					    (c_caddr_t)&iih);
8315 					PF_ACPY(&pd->nsaddr,
8316 					    &nk->addr[pd2.sidx], nk->af);
8317 					PF_ACPY(&pd->ndaddr,
8318 					    &nk->addr[pd2.didx], nk->af);
8319 					pd->naf = nk->af;
8320 					return (PF_AFRT);
8321 				}
8322 #endif
8323 
8324 				if (PF_ANEQ(pd2.src,
8325 				    &nk->addr[pd2.sidx], pd2.af) ||
8326 				    (virtual_type == htons(ICMP_ECHO) &&
8327 				    nk->port[iidx] != iih->icmp_id))
8328 					pf_change_icmp(pd2.src,
8329 					    (virtual_type == htons(ICMP_ECHO)) ?
8330 					    &iih->icmp_id : NULL,
8331 					    daddr, &nk->addr[pd2.sidx],
8332 					    (virtual_type == htons(ICMP_ECHO)) ?
8333 					    nk->port[iidx] : 0, NULL,
8334 					    pd2.ip_sum, icmpsum,
8335 					    pd->ip_sum, 0, AF_INET);
8336 
8337 				if (PF_ANEQ(pd2.dst,
8338 				    &nk->addr[pd2.didx], pd2.af))
8339 					pf_change_icmp(pd2.dst, NULL, NULL,
8340 					    &nk->addr[pd2.didx], 0, NULL,
8341 					    pd2.ip_sum, icmpsum, pd->ip_sum, 0,
8342 					    AF_INET);
8343 
8344 				m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp);
8345 				m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
8346 				m_copyback(pd->m, pd2.off, ICMP_MINLEN, (caddr_t)iih);
8347 			}
8348 			return (PF_PASS);
8349 			break;
8350 		}
8351 #endif /* INET */
8352 #ifdef INET6
8353 		case IPPROTO_ICMPV6: {
8354 			struct icmp6_hdr	*iih = &pd2.hdr.icmp6;
8355 
8356 			if (pd2.af != AF_INET6) {
8357 				REASON_SET(reason, PFRES_NORM);
8358 				return (PF_DROP);
8359 			}
8360 
8361 			if (!pf_pull_hdr(pd->m, pd2.off, iih,
8362 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8363 				DPFPRINTF(PF_DEBUG_MISC,
8364 				    ("pf: ICMP error message too short "
8365 				    "(icmp6)\n"));
8366 				return (PF_DROP);
8367 			}
8368 
8369 			pf_icmp_mapping(&pd2, iih->icmp6_type,
8370 			    &icmp_dir, &multi, &virtual_id, &virtual_type);
8371 
8372 			ret = pf_icmp_state_lookup(&key, &pd2, state,
8373 			    pd->dir, virtual_id, virtual_type,
8374 			    icmp_dir, &iidx, PF_ICMP_MULTI_NONE, 1);
8375 			if (ret >= 0) {
8376 				MPASS(*state == NULL);
8377 				if (ret == PF_DROP && pd2.af == AF_INET6 &&
8378 				    icmp_dir == PF_OUT) {
8379 					ret = pf_icmp_state_lookup(&key, &pd2,
8380 					    state, pd->dir,
8381 					    virtual_id, virtual_type,
8382 					    icmp_dir, &iidx, multi, 1);
8383 					if (ret >= 0) {
8384 						MPASS(*state == NULL);
8385 						return (ret);
8386 					}
8387 				} else
8388 					return (ret);
8389 			}
8390 
8391 			/* translate source/destination address, if necessary */
8392 			if ((*state)->key[PF_SK_WIRE] !=
8393 			    (*state)->key[PF_SK_STACK]) {
8394 				struct pf_state_key	*nk;
8395 
8396 				if (PF_REVERSED_KEY((*state)->key, pd->af))
8397 					nk = (*state)->key[pd->sidx];
8398 				else
8399 					nk = (*state)->key[pd->didx];
8400 
8401 #if defined(INET) && defined(INET6)
8402 				int	 afto, sidx, didx;
8403 
8404 				afto = pd->af != nk->af;
8405 				sidx = afto ? pd2.didx : pd2.sidx;
8406 				didx = afto ? pd2.sidx : pd2.didx;
8407 				iidx = afto ? !iidx : iidx;
8408 
8409 				if (afto) {
8410 					if (nk->af != AF_INET)
8411 						return (PF_DROP);
8412 					if (pf_translate_icmp_af(nk->af,
8413 					    &pd->hdr.icmp))
8414 						return (PF_DROP);
8415 					m_copyback(pd->m, pd->off,
8416 					    sizeof(struct icmp6_hdr),
8417 					    (c_caddr_t)&pd->hdr.icmp6);
8418 					if (pf_change_icmp_af(pd->m, ipoff2, pd,
8419 					    &pd2, &nk->addr[sidx],
8420 					    &nk->addr[didx], pd->af,
8421 					    nk->af))
8422 						return (PF_DROP);
8423 					pd->proto = IPPROTO_ICMP;
8424 					if (pf_translate_icmp_af(nk->af, &iih))
8425 						return (PF_DROP);
8426 					if (virtual_type ==
8427 					    htons(ICMP6_ECHO_REQUEST) &&
8428 					    nk->port[iidx] != iih->icmp6_id)
8429 						iih->icmp6_id = nk->port[iidx];
8430 					m_copyback(pd2.m, pd2.off,
8431 					    sizeof(struct icmp6_hdr), (c_caddr_t)&iih);
8432 					PF_ACPY(&pd->nsaddr,
8433 					    &nk->addr[pd2.sidx], nk->af);
8434 					PF_ACPY(&pd->ndaddr,
8435 					    &nk->addr[pd2.didx], nk->af);
8436 					pd->naf = nk->af;
8437 					return (PF_AFRT);
8438 				}
8439 #endif
8440 
8441 				if (PF_ANEQ(pd2.src,
8442 				    &nk->addr[pd2.sidx], pd2.af) ||
8443 				    ((virtual_type == htons(ICMP6_ECHO_REQUEST)) &&
8444 				    nk->port[pd2.sidx] != iih->icmp6_id))
8445 					pf_change_icmp(pd2.src,
8446 					    (virtual_type == htons(ICMP6_ECHO_REQUEST))
8447 					    ? &iih->icmp6_id : NULL,
8448 					    daddr, &nk->addr[pd2.sidx],
8449 					    (virtual_type == htons(ICMP6_ECHO_REQUEST))
8450 					    ? nk->port[iidx] : 0, NULL,
8451 					    pd2.ip_sum, icmpsum,
8452 					    pd->ip_sum, 0, AF_INET6);
8453 
8454 				if (PF_ANEQ(pd2.dst,
8455 				    &nk->addr[pd2.didx], pd2.af))
8456 					pf_change_icmp(pd2.dst, NULL, NULL,
8457 					    &nk->addr[pd2.didx], 0, NULL,
8458 					    pd2.ip_sum, icmpsum,
8459 					    pd->ip_sum, 0, AF_INET6);
8460 
8461 				m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr),
8462 				    (caddr_t)&pd->hdr.icmp6);
8463 				m_copyback(pd->m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
8464 				m_copyback(pd->m, pd2.off, sizeof(struct icmp6_hdr),
8465 				    (caddr_t)iih);
8466 			}
8467 			return (PF_PASS);
8468 			break;
8469 		}
8470 #endif /* INET6 */
8471 		default: {
8472 			key.af = pd2.af;
8473 			key.proto = pd2.proto;
8474 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
8475 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
8476 			key.port[0] = key.port[1] = 0;
8477 
8478 			STATE_LOOKUP(&key, *state, pd);
8479 
8480 			/* translate source/destination address, if necessary */
8481 			if ((*state)->key[PF_SK_WIRE] !=
8482 			    (*state)->key[PF_SK_STACK]) {
8483 				struct pf_state_key *nk =
8484 				    (*state)->key[pd->didx];
8485 
8486 				if (PF_ANEQ(pd2.src,
8487 				    &nk->addr[pd2.sidx], pd2.af))
8488 					pf_change_icmp(pd2.src, NULL, daddr,
8489 					    &nk->addr[pd2.sidx], 0, NULL,
8490 					    pd2.ip_sum, icmpsum,
8491 					    pd->ip_sum, 0, pd2.af);
8492 
8493 				if (PF_ANEQ(pd2.dst,
8494 				    &nk->addr[pd2.didx], pd2.af))
8495 					pf_change_icmp(pd2.dst, NULL, saddr,
8496 					    &nk->addr[pd2.didx], 0, NULL,
8497 					    pd2.ip_sum, icmpsum,
8498 					    pd->ip_sum, 0, pd2.af);
8499 
8500 				switch (pd2.af) {
8501 #ifdef INET
8502 				case AF_INET:
8503 					m_copyback(pd->m, pd->off, ICMP_MINLEN,
8504 					    (caddr_t)&pd->hdr.icmp);
8505 					m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2);
8506 					break;
8507 #endif /* INET */
8508 #ifdef INET6
8509 				case AF_INET6:
8510 					m_copyback(pd->m, pd->off,
8511 					    sizeof(struct icmp6_hdr),
8512 					    (caddr_t )&pd->hdr.icmp6);
8513 					m_copyback(pd->m, ipoff2, sizeof(h2_6),
8514 					    (caddr_t )&h2_6);
8515 					break;
8516 #endif /* INET6 */
8517 				}
8518 			}
8519 			return (PF_PASS);
8520 			break;
8521 		}
8522 		}
8523 	}
8524 }
8525 
8526 static int
8527 pf_test_state_other(struct pf_kstate **state, struct pf_pdesc *pd)
8528 {
8529 	struct pf_state_peer	*src, *dst;
8530 	struct pf_state_key_cmp	 key;
8531 	uint8_t			 psrc, pdst;
8532 	int			 action = PF_PASS;
8533 
8534 	bzero(&key, sizeof(key));
8535 	key.af = pd->af;
8536 	key.proto = pd->proto;
8537 	if (pd->dir == PF_IN)	{
8538 		PF_ACPY(&key.addr[0], pd->src, key.af);
8539 		PF_ACPY(&key.addr[1], pd->dst, key.af);
8540 		key.port[0] = key.port[1] = 0;
8541 	} else {
8542 		PF_ACPY(&key.addr[1], pd->src, key.af);
8543 		PF_ACPY(&key.addr[0], pd->dst, key.af);
8544 		key.port[1] = key.port[0] = 0;
8545 	}
8546 
8547 	STATE_LOOKUP(&key, *state, pd);
8548 
8549 	if (pd->dir == (*state)->direction) {
8550 		src = &(*state)->src;
8551 		dst = &(*state)->dst;
8552 		psrc = PF_PEER_SRC;
8553 		pdst = PF_PEER_DST;
8554 	} else {
8555 		src = &(*state)->dst;
8556 		dst = &(*state)->src;
8557 		psrc = PF_PEER_DST;
8558 		pdst = PF_PEER_SRC;
8559 	}
8560 
8561 	/* update states */
8562 	if (src->state < PFOTHERS_SINGLE)
8563 		pf_set_protostate(*state, psrc, PFOTHERS_SINGLE);
8564 	if (dst->state == PFOTHERS_SINGLE)
8565 		pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE);
8566 
8567 	/* update expire time */
8568 	(*state)->expire = pf_get_uptime();
8569 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
8570 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
8571 	else
8572 		(*state)->timeout = PFTM_OTHER_SINGLE;
8573 
8574 	/* translate source/destination address, if necessary */
8575 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
8576 		struct pf_state_key	*nk;
8577 		int			 afto;
8578 
8579 		if (PF_REVERSED_KEY((*state)->key, pd->af))
8580 			nk = (*state)->key[pd->sidx];
8581 		else
8582 			nk = (*state)->key[pd->didx];
8583 
8584 		KASSERT(nk, ("%s: nk is null", __func__));
8585 		KASSERT(pd, ("%s: pd is null", __func__));
8586 		KASSERT(pd->src, ("%s: pd->src is null", __func__));
8587 		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
8588 
8589 		afto = pd->af != nk->af;
8590 
8591 		switch (pd->af) {
8592 #ifdef INET
8593 		case AF_INET:
8594 			if (!afto &&
8595 			    PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
8596 				pf_change_a(&pd->src->v4.s_addr,
8597 				    pd->ip_sum,
8598 				    nk->addr[pd->sidx].v4.s_addr,
8599 				    0);
8600 
8601 			if (!afto &&
8602 			    PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
8603 				pf_change_a(&pd->dst->v4.s_addr,
8604 				    pd->ip_sum,
8605 				    nk->addr[pd->didx].v4.s_addr,
8606 				    0);
8607 
8608 			break;
8609 #endif /* INET */
8610 #ifdef INET6
8611 		case AF_INET6:
8612 			if (!afto &&
8613 			    PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6))
8614 				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
8615 
8616 			if (!afto &&
8617 			    PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6))
8618 				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
8619 #endif /* INET6 */
8620 		}
8621 		if (afto) {
8622 			PF_ACPY(&pd->nsaddr,
8623 			    &nk->addr[afto ? pd->didx : pd->sidx], nk->af);
8624 			PF_ACPY(&pd->ndaddr,
8625 			    &nk->addr[afto ? pd->sidx : pd->didx], nk->af);
8626 			pd->naf = nk->af;
8627 			action = PF_AFRT;
8628 		}
8629 	}
8630 	return (action);
8631 }
8632 
8633 /*
8634  * ipoff and off are measured from the start of the mbuf chain.
8635  * h must be at "ipoff" on the mbuf chain.
8636  */
8637 void *
8638 pf_pull_hdr(const struct mbuf *m, int off, void *p, int len,
8639     u_short *actionp, u_short *reasonp, sa_family_t af)
8640 {
8641 	switch (af) {
8642 #ifdef INET
8643 	case AF_INET: {
8644 		const struct ip	*h = mtod(m, struct ip *);
8645 		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
8646 
8647 		if (fragoff) {
8648 			if (fragoff >= len)
8649 				ACTION_SET(actionp, PF_PASS);
8650 			else {
8651 				ACTION_SET(actionp, PF_DROP);
8652 				REASON_SET(reasonp, PFRES_FRAG);
8653 			}
8654 			return (NULL);
8655 		}
8656 		if (m->m_pkthdr.len < off + len ||
8657 		    ntohs(h->ip_len) < off + len) {
8658 			ACTION_SET(actionp, PF_DROP);
8659 			REASON_SET(reasonp, PFRES_SHORT);
8660 			return (NULL);
8661 		}
8662 		break;
8663 	}
8664 #endif /* INET */
8665 #ifdef INET6
8666 	case AF_INET6: {
8667 		const struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
8668 
8669 		if (m->m_pkthdr.len < off + len ||
8670 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
8671 		    (unsigned)(off + len)) {
8672 			ACTION_SET(actionp, PF_DROP);
8673 			REASON_SET(reasonp, PFRES_SHORT);
8674 			return (NULL);
8675 		}
8676 		break;
8677 	}
8678 #endif /* INET6 */
8679 	}
8680 	m_copydata(m, off, len, p);
8681 	return (p);
8682 }
8683 
8684 int
8685 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif,
8686     int rtableid)
8687 {
8688 	struct ifnet		*ifp;
8689 
8690 	/*
8691 	 * Skip check for addresses with embedded interface scope,
8692 	 * as they would always match anyway.
8693 	 */
8694 	if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
8695 		return (1);
8696 
8697 	if (af != AF_INET && af != AF_INET6)
8698 		return (0);
8699 
8700 	if (kif == V_pfi_all)
8701 		return (1);
8702 
8703 	/* Skip checks for ipsec interfaces */
8704 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
8705 		return (1);
8706 
8707 	ifp = (kif != NULL) ? kif->pfik_ifp : NULL;
8708 
8709 	switch (af) {
8710 #ifdef INET6
8711 	case AF_INET6:
8712 		return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE,
8713 		    ifp));
8714 #endif
8715 #ifdef INET
8716 	case AF_INET:
8717 		return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE,
8718 		    ifp));
8719 #endif
8720 	}
8721 
8722 	return (0);
8723 }
8724 
8725 #ifdef INET
8726 static void
8727 pf_route(struct mbuf **m, struct pf_krule *r, struct ifnet *oifp,
8728     struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
8729 {
8730 	struct mbuf		*m0, *m1, *md;
8731 	struct sockaddr_in	dst;
8732 	struct ip		*ip;
8733 	struct ifnet		*ifp = NULL;
8734 	int			 error = 0;
8735 	uint16_t		 ip_len, ip_off;
8736 	uint16_t		 tmp;
8737 	int			 r_dir;
8738 
8739 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
8740 
8741 	SDT_PROBE4(pf, ip, route_to, entry, *m, pd, s, oifp);
8742 
8743 	if (s) {
8744 		r_dir = s->direction;
8745 	} else {
8746 		r_dir = r->direction;
8747 	}
8748 
8749 	KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
8750 	    r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
8751 	    __func__));
8752 
8753 	if ((pd->pf_mtag == NULL &&
8754 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
8755 	    pd->pf_mtag->routed++ > 3) {
8756 		m0 = *m;
8757 		*m = NULL;
8758 		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8759 		goto bad_locked;
8760 	}
8761 
8762 	if (pd->act.rt_kif != NULL)
8763 		ifp = pd->act.rt_kif->pfik_ifp;
8764 
8765 	if (pd->act.rt == PF_DUPTO) {
8766 		if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
8767 			if (s != NULL) {
8768 				PF_STATE_UNLOCK(s);
8769 			}
8770 			if (ifp == oifp) {
8771 				/* When the 2nd interface is not skipped */
8772 				return;
8773 			} else {
8774 				m0 = *m;
8775 				*m = NULL;
8776 				SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8777 				goto bad;
8778 			}
8779 		} else {
8780 			pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
8781 			if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) {
8782 				if (s)
8783 					PF_STATE_UNLOCK(s);
8784 				return;
8785 			}
8786 		}
8787 	} else {
8788 		if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
8789 			pf_dummynet(pd, s, r, m);
8790 			if (s)
8791 				PF_STATE_UNLOCK(s);
8792 			return;
8793 		}
8794 		m0 = *m;
8795 	}
8796 
8797 	ip = mtod(m0, struct ip *);
8798 
8799 	bzero(&dst, sizeof(dst));
8800 	dst.sin_family = AF_INET;
8801 	dst.sin_len = sizeof(dst);
8802 	dst.sin_addr = ip->ip_dst;
8803 	dst.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
8804 
8805 	if (s != NULL){
8806 		if (r->rule_flag & PFRULE_IFBOUND &&
8807 		    pd->act.rt == PF_REPLYTO &&
8808 		    s->kif == V_pfi_all) {
8809 			s->kif = pd->act.rt_kif;
8810 			s->orig_kif = oifp->if_pf_kif;
8811 		}
8812 
8813 		if (ifp == NULL && (pd->af != pd->naf)) {
8814 			/* We're in the AFTO case. Do a route lookup. */
8815 			struct nhop_object *nh;
8816 			nh = fib4_lookup(M_GETFIB(*m), ip->ip_dst, 0, NHR_NONE, 0);
8817 			if (nh) {
8818 				ifp = nh->nh_ifp;
8819 
8820 				/* Use the gateway if needed. */
8821 				if (nh->nh_flags & NHF_GATEWAY)
8822 					dst.sin_addr = nh->gw4_sa.sin_addr;
8823 				else
8824 					dst.sin_addr = ip->ip_dst;
8825 
8826 				/*
8827 				 * Bind to the correct interface if we're
8828 				 * if-bound. We don't know which interface
8829 				 * that will be until here, so we've inserted
8830 				 * the state on V_pf_all. Fix that now.
8831 				 */
8832 				if (s->kif == V_pfi_all && ifp != NULL &&
8833 				    r->rule_flag & PFRULE_IFBOUND)
8834 					s->kif = ifp->if_pf_kif;
8835 			}
8836 		}
8837 
8838 		PF_STATE_UNLOCK(s);
8839 	}
8840 
8841 	if (ifp == NULL) {
8842 		m0 = *m;
8843 		*m = NULL;
8844 		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8845 		goto bad;
8846 	}
8847 
8848 	if (pd->dir == PF_IN) {
8849 		if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp,
8850 		    &pd->act) != PF_PASS) {
8851 			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8852 			goto bad;
8853 		} else if (m0 == NULL) {
8854 			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8855 			goto done;
8856 		}
8857 		if (m0->m_len < sizeof(struct ip)) {
8858 			DPFPRINTF(PF_DEBUG_URGENT,
8859 			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
8860 			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8861 			goto bad;
8862 		}
8863 		ip = mtod(m0, struct ip *);
8864 	}
8865 
8866 	if (ifp->if_flags & IFF_LOOPBACK)
8867 		m0->m_flags |= M_SKIP_FIREWALL;
8868 
8869 	ip_len = ntohs(ip->ip_len);
8870 	ip_off = ntohs(ip->ip_off);
8871 
8872 	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
8873 	m0->m_pkthdr.csum_flags |= CSUM_IP;
8874 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
8875 		in_delayed_cksum(m0);
8876 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
8877 	}
8878 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
8879 		pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2));
8880 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
8881 	}
8882 
8883 	if (pd->dir == PF_IN) {
8884 		/*
8885 		 * Make sure dummynet gets the correct direction, in case it needs to
8886 		 * re-inject later.
8887 		 */
8888 		pd->dir = PF_OUT;
8889 
8890 		/*
8891 		 * The following processing is actually the rest of the inbound processing, even
8892 		 * though we've marked it as outbound (so we don't look through dummynet) and it
8893 		 * happens after the outbound processing (pf_test(PF_OUT) above).
8894 		 * Swap the dummynet pipe numbers, because it's going to come to the wrong
8895 		 * conclusion about what direction it's processing, and we can't fix it or it
8896 		 * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
8897 		 * decision will pick the right pipe, and everything will mostly work as expected.
8898 		 */
8899 		tmp = pd->act.dnrpipe;
8900 		pd->act.dnrpipe = pd->act.dnpipe;
8901 		pd->act.dnpipe = tmp;
8902 	}
8903 
8904 	/*
8905 	 * If small enough for interface, or the interface will take
8906 	 * care of the fragmentation for us, we can just send directly.
8907 	 */
8908 	if (ip_len <= ifp->if_mtu ||
8909 	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
8910 		ip->ip_sum = 0;
8911 		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
8912 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
8913 			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
8914 		}
8915 		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
8916 
8917 		md = m0;
8918 		error = pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md);
8919 		if (md != NULL) {
8920 			error = (*ifp->if_output)(ifp, md, sintosa(&dst), NULL);
8921 			SDT_PROBE2(pf, ip, route_to, output, ifp, error);
8922 		}
8923 		goto done;
8924 	}
8925 
8926 	/* Balk when DF bit is set or the interface didn't support TSO. */
8927 	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
8928 		error = EMSGSIZE;
8929 		KMOD_IPSTAT_INC(ips_cantfrag);
8930 		if (pd->act.rt != PF_DUPTO) {
8931 			if (s && s->nat_rule != NULL)
8932 				PACKET_UNDO_NAT(m0, pd,
8933 				    (ip->ip_hl << 2) + (ip_off & IP_OFFMASK),
8934 				    s);
8935 
8936 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
8937 			    ifp->if_mtu);
8938 			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8939 			goto done;
8940 		} else {
8941 			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8942 			goto bad;
8943 		}
8944 	}
8945 
8946 	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
8947 	if (error) {
8948 		SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
8949 		goto bad;
8950 	}
8951 
8952 	for (; m0; m0 = m1) {
8953 		m1 = m0->m_nextpkt;
8954 		m0->m_nextpkt = NULL;
8955 		if (error == 0) {
8956 			m_clrprotoflags(m0);
8957 			md = m0;
8958 			pd->pf_mtag = pf_find_mtag(md);
8959 			error = pf_dummynet_route(pd, s, r, ifp,
8960 			    sintosa(&dst), &md);
8961 			if (md != NULL) {
8962 				error = (*ifp->if_output)(ifp, md,
8963 				    sintosa(&dst), NULL);
8964 				SDT_PROBE2(pf, ip, route_to, output, ifp, error);
8965 			}
8966 		} else
8967 			m_freem(m0);
8968 	}
8969 
8970 	if (error == 0)
8971 		KMOD_IPSTAT_INC(ips_fragmented);
8972 
8973 done:
8974 	if (pd->act.rt != PF_DUPTO)
8975 		*m = NULL;
8976 	return;
8977 
8978 bad_locked:
8979 	if (s)
8980 		PF_STATE_UNLOCK(s);
8981 bad:
8982 	m_freem(m0);
8983 	goto done;
8984 }
8985 #endif /* INET */
8986 
8987 #ifdef INET6
8988 static void
8989 pf_route6(struct mbuf **m, struct pf_krule *r, struct ifnet *oifp,
8990     struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
8991 {
8992 	struct mbuf		*m0, *md;
8993 	struct m_tag		*mtag;
8994 	struct sockaddr_in6	dst;
8995 	struct ip6_hdr		*ip6;
8996 	struct ifnet		*ifp = NULL;
8997 	int			 r_dir;
8998 
8999 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
9000 
9001 	SDT_PROBE4(pf, ip6, route_to, entry, *m, pd, s, oifp);
9002 
9003 	if (s) {
9004 		r_dir = s->direction;
9005 	} else {
9006 		r_dir = r->direction;
9007 	}
9008 
9009 	KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT ||
9010 	    r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction",
9011 	    __func__));
9012 
9013 	if ((pd->pf_mtag == NULL &&
9014 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
9015 	    pd->pf_mtag->routed++ > 3) {
9016 		m0 = *m;
9017 		*m = NULL;
9018 		SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9019 		goto bad_locked;
9020 	}
9021 
9022 	if (pd->act.rt_kif != NULL)
9023 		ifp = pd->act.rt_kif->pfik_ifp;
9024 
9025 	if (pd->act.rt == PF_DUPTO) {
9026 		if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) {
9027 			if (s != NULL) {
9028 				PF_STATE_UNLOCK(s);
9029 			}
9030 			if (ifp == oifp) {
9031 				/* When the 2nd interface is not skipped */
9032 				return;
9033 			} else {
9034 				m0 = *m;
9035 				*m = NULL;
9036 				SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9037 				goto bad;
9038 			}
9039 		} else {
9040 			pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED;
9041 			if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) {
9042 				if (s)
9043 					PF_STATE_UNLOCK(s);
9044 				return;
9045 			}
9046 		}
9047 	} else {
9048 		if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) {
9049 			pf_dummynet(pd, s, r, m);
9050 			if (s)
9051 				PF_STATE_UNLOCK(s);
9052 			return;
9053 		}
9054 		m0 = *m;
9055 	}
9056 
9057 	ip6 = mtod(m0, struct ip6_hdr *);
9058 
9059 	bzero(&dst, sizeof(dst));
9060 	dst.sin6_family = AF_INET6;
9061 	dst.sin6_len = sizeof(dst);
9062 	dst.sin6_addr = ip6->ip6_dst;
9063 	PF_ACPY((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, AF_INET6);
9064 
9065 	if (s != NULL) {
9066 		if (r->rule_flag & PFRULE_IFBOUND &&
9067 		    pd->act.rt == PF_REPLYTO &&
9068 		    s->kif == V_pfi_all) {
9069 			s->kif = pd->act.rt_kif;
9070 			s->orig_kif = oifp->if_pf_kif;
9071 		}
9072 
9073 		if (ifp == NULL && (pd->af != pd->naf)) {
9074 			struct nhop_object *nh;
9075 			nh = fib6_lookup(M_GETFIB(*m), &ip6->ip6_dst, 0, NHR_NONE, 0);
9076 			if (nh) {
9077 				ifp = nh->nh_ifp;
9078 
9079 				/* Use the gateway if needed. */
9080 				if (nh->nh_flags & NHF_GATEWAY)
9081 					bcopy(&dst.sin6_addr, &nh->gw6_sa.sin6_addr,
9082 					    sizeof(dst.sin6_addr));
9083 				else
9084 					dst.sin6_addr = ip6->ip6_dst;
9085 
9086 				/*
9087 				 * Bind to the correct interface if we're
9088 				 * if-bound. We don't know which interface
9089 				 * that will be until here, so we've inserted
9090 				 * the state on V_pf_all. Fix that now.
9091 				 */
9092 				if (s->kif == V_pfi_all && ifp != NULL &&
9093 				    r->rule_flag & PFRULE_IFBOUND)
9094 					s->kif = ifp->if_pf_kif;
9095 			}
9096 		}
9097 
9098 		PF_STATE_UNLOCK(s);
9099 	}
9100 
9101 	if (pd->af != pd->naf) {
9102 		struct udphdr *uh = &pd->hdr.udp;
9103 
9104 		if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) {
9105 			uh->uh_sum = in6_cksum_pseudo(ip6,
9106 			    ntohs(uh->uh_ulen), IPPROTO_UDP, 0);
9107 			m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any);
9108 		}
9109 	}
9110 
9111 	if (ifp == NULL) {
9112 		m0 = *m;
9113 		*m = NULL;
9114 		SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9115 		goto bad;
9116 	}
9117 
9118 	if (pd->dir == PF_IN) {
9119 		if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT,
9120 		    ifp, &m0, inp, &pd->act) != PF_PASS) {
9121 			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9122 			goto bad;
9123 		} else if (m0 == NULL) {
9124 			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9125 			goto done;
9126 		}
9127 		if (m0->m_len < sizeof(struct ip6_hdr)) {
9128 			DPFPRINTF(PF_DEBUG_URGENT,
9129 			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
9130 			    __func__));
9131 			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9132 			goto bad;
9133 		}
9134 		ip6 = mtod(m0, struct ip6_hdr *);
9135 	}
9136 
9137 	if (ifp->if_flags & IFF_LOOPBACK)
9138 		m0->m_flags |= M_SKIP_FIREWALL;
9139 
9140 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
9141 	    ~ifp->if_hwassist) {
9142 		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
9143 		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
9144 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
9145 	}
9146 
9147 	if (pd->dir == PF_IN) {
9148 		uint16_t	 tmp;
9149 		/*
9150 		 * Make sure dummynet gets the correct direction, in case it needs to
9151 		 * re-inject later.
9152 		 */
9153 		pd->dir = PF_OUT;
9154 
9155 		/*
9156 		 * The following processing is actually the rest of the inbound processing, even
9157 		 * though we've marked it as outbound (so we don't look through dummynet) and it
9158 		 * happens after the outbound processing (pf_test(PF_OUT) above).
9159 		 * Swap the dummynet pipe numbers, because it's going to come to the wrong
9160 		 * conclusion about what direction it's processing, and we can't fix it or it
9161 		 * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect
9162 		 * decision will pick the right pipe, and everything will mostly work as expected.
9163 		 */
9164 		tmp = pd->act.dnrpipe;
9165 		pd->act.dnrpipe = pd->act.dnpipe;
9166 		pd->act.dnpipe = tmp;
9167 	}
9168 
9169 	/*
9170 	 * If the packet is too large for the outgoing interface,
9171 	 * send back an icmp6 error.
9172 	 */
9173 	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
9174 		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9175 	mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL);
9176 	if (mtag != NULL) {
9177 		int ret __sdt_used;
9178 		ret = pf_refragment6(ifp, &m0, mtag, ifp, true);
9179 		SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
9180 		goto done;
9181 	}
9182 
9183 	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
9184 		md = m0;
9185 		pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md);
9186 		if (md != NULL) {
9187 			int ret __sdt_used;
9188 			ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL);
9189 			SDT_PROBE2(pf, ip6, route_to, output, ifp, ret);
9190 		}
9191 	}
9192 	else {
9193 		in6_ifstat_inc(ifp, ifs6_in_toobig);
9194 		if (pd->act.rt != PF_DUPTO) {
9195 			if (s && s->nat_rule != NULL)
9196 				PACKET_UNDO_NAT(m0, pd,
9197 				    ((caddr_t)ip6 - m0->m_data) +
9198 				    sizeof(struct ip6_hdr), s);
9199 
9200 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9201 			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9202 		} else {
9203 			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
9204 			goto bad;
9205 		}
9206 	}
9207 
9208 done:
9209 	if (pd->act.rt != PF_DUPTO)
9210 		*m = NULL;
9211 	return;
9212 
9213 bad_locked:
9214 	if (s)
9215 		PF_STATE_UNLOCK(s);
9216 bad:
9217 	m_freem(m0);
9218 	goto done;
9219 }
9220 #endif /* INET6 */
9221 
9222 /*
9223  * FreeBSD supports cksum offloads for the following drivers.
9224  *  em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4)
9225  *
9226  * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
9227  *  network driver performed cksum including pseudo header, need to verify
9228  *   csum_data
9229  * CSUM_DATA_VALID :
9230  *  network driver performed cksum, needs to additional pseudo header
9231  *  cksum computation with partial csum_data(i.e. lack of H/W support for
9232  *  pseudo header, for instance sk(4) and possibly gem(4))
9233  *
9234  * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
9235  * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
9236  * TCP/UDP layer.
9237  * Also, set csum_data to 0xffff to force cksum validation.
9238  */
9239 static int
9240 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
9241 {
9242 	u_int16_t sum = 0;
9243 	int hw_assist = 0;
9244 	struct ip *ip;
9245 
9246 	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
9247 		return (1);
9248 	if (m->m_pkthdr.len < off + len)
9249 		return (1);
9250 
9251 	switch (p) {
9252 	case IPPROTO_TCP:
9253 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
9254 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
9255 				sum = m->m_pkthdr.csum_data;
9256 			} else {
9257 				ip = mtod(m, struct ip *);
9258 				sum = in_pseudo(ip->ip_src.s_addr,
9259 				ip->ip_dst.s_addr, htonl((u_short)len +
9260 				m->m_pkthdr.csum_data + IPPROTO_TCP));
9261 			}
9262 			sum ^= 0xffff;
9263 			++hw_assist;
9264 		}
9265 		break;
9266 	case IPPROTO_UDP:
9267 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
9268 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
9269 				sum = m->m_pkthdr.csum_data;
9270 			} else {
9271 				ip = mtod(m, struct ip *);
9272 				sum = in_pseudo(ip->ip_src.s_addr,
9273 				ip->ip_dst.s_addr, htonl((u_short)len +
9274 				m->m_pkthdr.csum_data + IPPROTO_UDP));
9275 			}
9276 			sum ^= 0xffff;
9277 			++hw_assist;
9278 		}
9279 		break;
9280 	case IPPROTO_ICMP:
9281 #ifdef INET6
9282 	case IPPROTO_ICMPV6:
9283 #endif /* INET6 */
9284 		break;
9285 	default:
9286 		return (1);
9287 	}
9288 
9289 	if (!hw_assist) {
9290 		switch (af) {
9291 		case AF_INET:
9292 			if (p == IPPROTO_ICMP) {
9293 				if (m->m_len < off)
9294 					return (1);
9295 				m->m_data += off;
9296 				m->m_len -= off;
9297 				sum = in_cksum(m, len);
9298 				m->m_data -= off;
9299 				m->m_len += off;
9300 			} else {
9301 				if (m->m_len < sizeof(struct ip))
9302 					return (1);
9303 				sum = in4_cksum(m, p, off, len);
9304 			}
9305 			break;
9306 #ifdef INET6
9307 		case AF_INET6:
9308 			if (m->m_len < sizeof(struct ip6_hdr))
9309 				return (1);
9310 			sum = in6_cksum(m, p, off, len);
9311 			break;
9312 #endif /* INET6 */
9313 		}
9314 	}
9315 	if (sum) {
9316 		switch (p) {
9317 		case IPPROTO_TCP:
9318 		    {
9319 			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
9320 			break;
9321 		    }
9322 		case IPPROTO_UDP:
9323 		    {
9324 			KMOD_UDPSTAT_INC(udps_badsum);
9325 			break;
9326 		    }
9327 #ifdef INET
9328 		case IPPROTO_ICMP:
9329 		    {
9330 			KMOD_ICMPSTAT_INC(icps_checksum);
9331 			break;
9332 		    }
9333 #endif
9334 #ifdef INET6
9335 		case IPPROTO_ICMPV6:
9336 		    {
9337 			KMOD_ICMP6STAT_INC(icp6s_checksum);
9338 			break;
9339 		    }
9340 #endif /* INET6 */
9341 		}
9342 		return (1);
9343 	} else {
9344 		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
9345 			m->m_pkthdr.csum_flags |=
9346 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
9347 			m->m_pkthdr.csum_data = 0xffff;
9348 		}
9349 	}
9350 	return (0);
9351 }
9352 
9353 static bool
9354 pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r,
9355     const struct pf_kstate *s, struct ip_fw_args *dnflow)
9356 {
9357 	int dndir = r->direction;
9358 
9359 	if (s && dndir == PF_INOUT) {
9360 		dndir = s->direction;
9361 	} else if (dndir == PF_INOUT) {
9362 		/* Assume primary direction. Happens when we've set dnpipe in
9363 		 * the ethernet level code. */
9364 		dndir = pd->dir;
9365 	}
9366 
9367 	if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED)
9368 		return (false);
9369 
9370 	memset(dnflow, 0, sizeof(*dnflow));
9371 
9372 	if (pd->dport != NULL)
9373 		dnflow->f_id.dst_port = ntohs(*pd->dport);
9374 	if (pd->sport != NULL)
9375 		dnflow->f_id.src_port = ntohs(*pd->sport);
9376 
9377 	if (pd->dir == PF_IN)
9378 		dnflow->flags |= IPFW_ARGS_IN;
9379 	else
9380 		dnflow->flags |= IPFW_ARGS_OUT;
9381 
9382 	if (pd->dir != dndir && pd->act.dnrpipe) {
9383 		dnflow->rule.info = pd->act.dnrpipe;
9384 	}
9385 	else if (pd->dir == dndir && pd->act.dnpipe) {
9386 		dnflow->rule.info = pd->act.dnpipe;
9387 	}
9388 	else {
9389 		return (false);
9390 	}
9391 
9392 	dnflow->rule.info |= IPFW_IS_DUMMYNET;
9393 	if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFSTATE_DN_IS_PIPE)
9394 		dnflow->rule.info |= IPFW_IS_PIPE;
9395 
9396 	dnflow->f_id.proto = pd->proto;
9397 	dnflow->f_id.extra = dnflow->rule.info;
9398 	switch (pd->naf) {
9399 	case AF_INET:
9400 		dnflow->f_id.addr_type = 4;
9401 		dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr);
9402 		dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr);
9403 		break;
9404 	case AF_INET6:
9405 		dnflow->flags |= IPFW_ARGS_IP6;
9406 		dnflow->f_id.addr_type = 6;
9407 		dnflow->f_id.src_ip6 = pd->src->v6;
9408 		dnflow->f_id.dst_ip6 = pd->dst->v6;
9409 		break;
9410 	}
9411 
9412 	return (true);
9413 }
9414 
9415 int
9416 pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
9417     struct inpcb *inp)
9418 {
9419 	struct pfi_kkif		*kif;
9420 	struct mbuf		*m = *m0;
9421 
9422 	M_ASSERTPKTHDR(m);
9423 	MPASS(ifp->if_vnet == curvnet);
9424 	NET_EPOCH_ASSERT();
9425 
9426 	if (!V_pf_status.running)
9427 		return (PF_PASS);
9428 
9429 	kif = (struct pfi_kkif *)ifp->if_pf_kif;
9430 
9431 	if (kif == NULL) {
9432 		DPFPRINTF(PF_DEBUG_URGENT,
9433 		    ("%s: kif == NULL, if_xname %s\n", __func__, ifp->if_xname));
9434 		return (PF_DROP);
9435 	}
9436 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
9437 		return (PF_PASS);
9438 
9439 	if (m->m_flags & M_SKIP_FIREWALL)
9440 		return (PF_PASS);
9441 
9442 	if (__predict_false(! M_WRITABLE(*m0))) {
9443 		m = *m0 = m_unshare(*m0, M_NOWAIT);
9444 		if (*m0 == NULL)
9445 			return (PF_DROP);
9446 	}
9447 
9448 	/* Stateless! */
9449 	return (pf_test_eth_rule(dir, kif, m0));
9450 }
9451 
9452 static __inline void
9453 pf_dummynet_flag_remove(struct mbuf *m, struct pf_mtag *pf_mtag)
9454 {
9455 	struct m_tag *mtag;
9456 
9457 	pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET;
9458 
9459 	/* dummynet adds this tag, but pf does not need it,
9460 	 * and keeping it creates unexpected behavior,
9461 	 * e.g. in case of divert(4) usage right after dummynet. */
9462 	mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
9463 	if (mtag != NULL)
9464 		m_tag_delete(m, mtag);
9465 }
9466 
9467 static int
9468 pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s,
9469     struct pf_krule *r, struct mbuf **m0)
9470 {
9471 	return (pf_dummynet_route(pd, s, r, NULL, NULL, m0));
9472 }
9473 
9474 static int
9475 pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s,
9476     struct pf_krule *r, struct ifnet *ifp, struct sockaddr *sa,
9477     struct mbuf **m0)
9478 {
9479 	struct ip_fw_args dnflow;
9480 
9481 	NET_EPOCH_ASSERT();
9482 
9483 	if (pd->act.dnpipe == 0 && pd->act.dnrpipe == 0)
9484 		return (0);
9485 
9486 	if (ip_dn_io_ptr == NULL) {
9487 		m_freem(*m0);
9488 		*m0 = NULL;
9489 		return (ENOMEM);
9490 	}
9491 
9492 	if (pd->pf_mtag == NULL &&
9493 	    ((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) {
9494 		m_freem(*m0);
9495 		*m0 = NULL;
9496 		return (ENOMEM);
9497 	}
9498 
9499 	if (ifp != NULL) {
9500 		pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO;
9501 
9502 		pd->pf_mtag->if_index = ifp->if_index;
9503 		pd->pf_mtag->if_idxgen = ifp->if_idxgen;
9504 
9505 		MPASS(sa != NULL);
9506 
9507 		switch (pd->naf) {
9508 		case AF_INET:
9509 			memcpy(&pd->pf_mtag->dst, sa,
9510 			    sizeof(struct sockaddr_in));
9511 			break;
9512 		case AF_INET6:
9513 			memcpy(&pd->pf_mtag->dst, sa,
9514 			    sizeof(struct sockaddr_in6));
9515 			break;
9516 		}
9517 	}
9518 
9519 	if (s != NULL && s->nat_rule != NULL &&
9520 	    s->nat_rule->action == PF_RDR &&
9521 	    (
9522 #ifdef INET
9523 	    (pd->af == AF_INET && IN_LOOPBACK(ntohl(pd->dst->v4.s_addr))) ||
9524 #endif
9525 	    (pd->af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd->dst->v6)))) {
9526 		/*
9527 		 * If we're redirecting to loopback mark this packet
9528 		 * as being local. Otherwise it might get dropped
9529 		 * if dummynet re-injects.
9530 		 */
9531 		(*m0)->m_pkthdr.rcvif = V_loif;
9532 	}
9533 
9534 	if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) {
9535 		pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET;
9536 		pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED;
9537 		ip_dn_io_ptr(m0, &dnflow);
9538 		if (*m0 != NULL) {
9539 			pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
9540 			pf_dummynet_flag_remove(*m0, pd->pf_mtag);
9541 		}
9542 	}
9543 
9544 	return (0);
9545 }
9546 
9547 #ifdef INET6
9548 static int
9549 pf_walk_option6(struct mbuf *m, int off, int end, uint32_t *jumbolen,
9550     u_short *reason)
9551 {
9552 	struct ip6_opt		 opt;
9553 	struct ip6_opt_jumbo	 jumbo;
9554 	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
9555 
9556 	while (off < end) {
9557 		if (!pf_pull_hdr(m, off, &opt.ip6o_type, sizeof(opt.ip6o_type),
9558 			NULL, reason, AF_INET6)) {
9559 			DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short opt type"));
9560 			return (PF_DROP);
9561 		}
9562 		if (opt.ip6o_type == IP6OPT_PAD1) {
9563 			off++;
9564 			continue;
9565 		}
9566 		if (!pf_pull_hdr(m, off, &opt, sizeof(opt), NULL, reason,
9567 			AF_INET6)) {
9568 			DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short opt"));
9569 			return (PF_DROP);
9570 		}
9571 		if (off + sizeof(opt) + opt.ip6o_len > end) {
9572 			DPFPRINTF(PF_DEBUG_MISC, ("IPv6 long opt"));
9573 			REASON_SET(reason, PFRES_IPOPTIONS);
9574 			return (PF_DROP);
9575 		}
9576 		switch (opt.ip6o_type) {
9577 		case IP6OPT_JUMBO:
9578 			if (*jumbolen != 0) {
9579 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple jumbo"));
9580 				REASON_SET(reason, PFRES_IPOPTIONS);
9581 				return (PF_DROP);
9582 			}
9583 			if (ntohs(h->ip6_plen) != 0) {
9584 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 bad jumbo plen"));
9585 				REASON_SET(reason, PFRES_IPOPTIONS);
9586 				return (PF_DROP);
9587 			}
9588 			if (!pf_pull_hdr(m, off, &jumbo, sizeof(jumbo), NULL,
9589 				reason, AF_INET6)) {
9590 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short jumbo"));
9591 				return (PF_DROP);
9592 			}
9593 			memcpy(jumbolen, jumbo.ip6oj_jumbo_len,
9594 			    sizeof(*jumbolen));
9595 			*jumbolen = ntohl(*jumbolen);
9596 			if (*jumbolen < IPV6_MAXPACKET) {
9597 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short jumbolen"));
9598 				REASON_SET(reason, PFRES_IPOPTIONS);
9599 				return (PF_DROP);
9600 			}
9601 			break;
9602 		default:
9603 			break;
9604 		}
9605 		off += sizeof(opt) + opt.ip6o_len;
9606 	}
9607 
9608 	return (PF_PASS);
9609 }
9610 
9611 int
9612 pf_walk_header6(struct mbuf *m, struct ip6_hdr *h, int *off, int *extoff,
9613     int *fragoff, uint8_t *nxt, uint32_t *jumbolen, u_short *reason)
9614 {
9615 	struct ip6_frag		 frag;
9616 	struct ip6_ext		 ext;
9617 	struct ip6_rthdr	 rthdr;
9618 	int			 rthdr_cnt = 0;
9619 
9620 	*off += sizeof(struct ip6_hdr);
9621 	*extoff = *fragoff = 0;
9622 	*nxt = h->ip6_nxt;
9623 	*jumbolen = 0;
9624 	for (;;) {
9625 		switch (*nxt) {
9626 		case IPPROTO_FRAGMENT:
9627 			if (*fragoff != 0) {
9628 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple fragment"));
9629 				REASON_SET(reason, PFRES_FRAG);
9630 				return (PF_DROP);
9631 			}
9632 			/* jumbo payload packets cannot be fragmented */
9633 			if (*jumbolen != 0) {
9634 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 fragmented jumbo"));
9635 				REASON_SET(reason, PFRES_FRAG);
9636 				return (PF_DROP);
9637 			}
9638 			if (!pf_pull_hdr(m, *off, &frag, sizeof(frag), NULL,
9639 				reason, AF_INET6)) {
9640 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short fragment"));
9641 				return (PF_DROP);
9642 			}
9643 			*fragoff = *off;
9644 			/* stop walking over non initial fragments */
9645 			if ((frag.ip6f_offlg & IP6F_OFF_MASK) != 0)
9646 				return (PF_PASS);
9647 			*off += sizeof(frag);
9648 			*nxt = frag.ip6f_nxt;
9649 			break;
9650 		case IPPROTO_ROUTING:
9651 			if (rthdr_cnt++) {
9652 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple rthdr"));
9653 				REASON_SET(reason, PFRES_IPOPTIONS);
9654 				return (PF_DROP);
9655 			}
9656 			if (!pf_pull_hdr(m, *off, &rthdr, sizeof(rthdr), NULL,
9657 				reason, AF_INET6)) {
9658 				/* fragments may be short */
9659 				if (*fragoff != 0) {
9660 					*off = *fragoff;
9661 					*nxt = IPPROTO_FRAGMENT;
9662 					return (PF_PASS);
9663 				}
9664 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short rthdr"));
9665 				return (PF_DROP);
9666 			}
9667 			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
9668 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 rthdr0"));
9669 				REASON_SET(reason, PFRES_IPOPTIONS);
9670 				return (PF_DROP);
9671 			}
9672 			/* FALLTHROUGH */
9673 		case IPPROTO_AH:
9674 		case IPPROTO_HOPOPTS:
9675 		case IPPROTO_DSTOPTS:
9676 			if (!pf_pull_hdr(m, *off, &ext, sizeof(ext), NULL,
9677 				reason, AF_INET6)) {
9678 				/* fragments may be short */
9679 				if (*fragoff != 0) {
9680 					*off = *fragoff;
9681 					*nxt = IPPROTO_FRAGMENT;
9682 					return (PF_PASS);
9683 				}
9684 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short exthdr"));
9685 				return (PF_DROP);
9686 			}
9687 			/* reassembly needs the ext header before the frag */
9688 			if (*fragoff == 0)
9689 				*extoff = *off;
9690 			if (*nxt == IPPROTO_HOPOPTS && *fragoff == 0) {
9691 				if (pf_walk_option6(m, *off + sizeof(ext),
9692 					*off + (ext.ip6e_len + 1) * 8, jumbolen,
9693 					reason) != PF_PASS)
9694 					return (PF_DROP);
9695 				if (ntohs(h->ip6_plen) == 0 && *jumbolen != 0) {
9696 					DPFPRINTF(PF_DEBUG_MISC,
9697 					    ("IPv6 missing jumbo"));
9698 					REASON_SET(reason, PFRES_IPOPTIONS);
9699 					return (PF_DROP);
9700 				}
9701 			}
9702 			if (*nxt == IPPROTO_AH)
9703 				*off += (ext.ip6e_len + 2) * 4;
9704 			else
9705 				*off += (ext.ip6e_len + 1) * 8;
9706 			*nxt = ext.ip6e_nxt;
9707 			break;
9708 		case IPPROTO_TCP:
9709 		case IPPROTO_UDP:
9710 		case IPPROTO_SCTP:
9711 		case IPPROTO_ICMPV6:
9712 			/* fragments may be short, ignore inner header then */
9713 			if (*fragoff != 0 && ntohs(h->ip6_plen) < *off +
9714 			    (*nxt == IPPROTO_TCP ? sizeof(struct tcphdr) :
9715 			    *nxt == IPPROTO_UDP ? sizeof(struct udphdr) :
9716 			    *nxt == IPPROTO_SCTP ? sizeof(struct sctphdr) :
9717 			    sizeof(struct icmp6_hdr))) {
9718 				*off = *fragoff;
9719 				*nxt = IPPROTO_FRAGMENT;
9720 			}
9721 			/* FALLTHROUGH */
9722 		default:
9723 			return (PF_PASS);
9724 		}
9725 	}
9726 }
9727 #endif
9728 
9729 static void
9730 pf_init_pdesc(struct pf_pdesc *pd, struct mbuf *m)
9731 {
9732 	memset(pd, 0, sizeof(*pd));
9733 	pd->pf_mtag = pf_find_mtag(m);
9734 	pd->m = m;
9735 }
9736 
9737 static int
9738 pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
9739     u_short *action, u_short *reason, struct pfi_kkif *kif,
9740     struct pf_rule_actions *default_actions)
9741 {
9742 	pd->dir = dir;
9743 	pd->kif = kif;
9744 	pd->m = *m0;
9745 	pd->sidx = (dir == PF_IN) ? 0 : 1;
9746 	pd->didx = (dir == PF_IN) ? 1 : 0;
9747 	pd->af = pd->naf = af;
9748 
9749 	TAILQ_INIT(&pd->sctp_multihome_jobs);
9750 	if (default_actions != NULL)
9751 		memcpy(&pd->act, default_actions, sizeof(pd->act));
9752 
9753 	if (pd->pf_mtag && pd->pf_mtag->dnpipe) {
9754 		pd->act.dnpipe = pd->pf_mtag->dnpipe;
9755 		pd->act.flags = pd->pf_mtag->dnflags;
9756 	}
9757 
9758 	switch (af) {
9759 #ifdef INET
9760 	case AF_INET: {
9761 		struct ip *h;
9762 
9763 		if (__predict_false((*m0)->m_len < sizeof(struct ip)) &&
9764 		    (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) {
9765 			DPFPRINTF(PF_DEBUG_URGENT,
9766 			    ("pf_test: m_len < sizeof(struct ip), pullup failed\n"));
9767 			*action = PF_DROP;
9768 			REASON_SET(reason, PFRES_SHORT);
9769 			return (-1);
9770 		}
9771 
9772 		if (pf_normalize_ip(m0, reason, pd) != PF_PASS) {
9773 			/* We do IP header normalization and packet reassembly here */
9774 			*action = PF_DROP;
9775 			return (-1);
9776 		}
9777 		pd->m = *m0;
9778 
9779 		h = mtod(pd->m, struct ip *);
9780 		pd->off = h->ip_hl << 2;
9781 		if (pd->off < (int)sizeof(*h)) {
9782 			*action = PF_DROP;
9783 			REASON_SET(reason, PFRES_SHORT);
9784 			return (-1);
9785 		}
9786 		pd->src = (struct pf_addr *)&h->ip_src;
9787 		pd->dst = (struct pf_addr *)&h->ip_dst;
9788 		pd->ip_sum = &h->ip_sum;
9789 		pd->virtual_proto = pd->proto = h->ip_p;
9790 		pd->tos = h->ip_tos;
9791 		pd->ttl = h->ip_ttl;
9792 		pd->tot_len = ntohs(h->ip_len);
9793 		pd->act.rtableid = -1;
9794 
9795 		if (h->ip_hl > 5)	/* has options */
9796 			pd->badopts++;
9797 
9798 		if (h->ip_off & htons(IP_MF | IP_OFFMASK))
9799 			pd->virtual_proto = PF_VPROTO_FRAGMENT;
9800 
9801 		break;
9802 	}
9803 #endif
9804 #ifdef INET6
9805 	case AF_INET6: {
9806 		struct ip6_hdr *h;
9807 		int fragoff;
9808 		uint32_t jumbolen;
9809 		uint8_t nxt;
9810 
9811 		if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) &&
9812 		    (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) {
9813 			DPFPRINTF(PF_DEBUG_URGENT,
9814 			    ("pf_test6: m_len < sizeof(struct ip6_hdr)"
9815 			     ", pullup failed\n"));
9816 			*action = PF_DROP;
9817 			REASON_SET(reason, PFRES_SHORT);
9818 			return (-1);
9819 		}
9820 
9821 		h = mtod(pd->m, struct ip6_hdr *);
9822 		pd->off = 0;
9823 		if (pf_walk_header6(pd->m, h, &pd->off, &pd->extoff, &fragoff, &nxt,
9824 		    &jumbolen, reason) != PF_PASS) {
9825 			*action = PF_DROP;
9826 			return (-1);
9827 		}
9828 
9829 		h = mtod(pd->m, struct ip6_hdr *);
9830 		pd->src = (struct pf_addr *)&h->ip6_src;
9831 		pd->dst = (struct pf_addr *)&h->ip6_dst;
9832 		pd->ip_sum = NULL;
9833 		pd->tos = IPV6_DSCP(h);
9834 		pd->ttl = h->ip6_hlim;
9835 		pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
9836 		pd->virtual_proto = pd->proto = h->ip6_nxt;
9837 		pd->act.rtableid = -1;
9838 
9839 		if (fragoff != 0)
9840 			pd->virtual_proto = PF_VPROTO_FRAGMENT;
9841 
9842 		/*
9843 		 * we do not support jumbogram.  if we keep going, zero ip6_plen
9844 		 * will do something bad, so drop the packet for now.
9845 		 */
9846 		if (htons(h->ip6_plen) == 0) {
9847 			*action = PF_DROP;
9848 			return (-1);
9849 		}
9850 
9851 		/* We do IP header normalization and packet reassembly here */
9852 		if (pf_normalize_ip6(m0, fragoff, reason, pd) !=
9853 		    PF_PASS) {
9854 			*action = PF_DROP;
9855 			return (-1);
9856 		}
9857 		pd->m = *m0;
9858 		if (pd->m == NULL) {
9859 			/* packet sits in reassembly queue, no error */
9860 			*action = PF_PASS;
9861 			return (-1);
9862 		}
9863 
9864 		/* Update pointers into the packet. */
9865 		h = mtod(pd->m, struct ip6_hdr *);
9866 		pd->src = (struct pf_addr *)&h->ip6_src;
9867 		pd->dst = (struct pf_addr *)&h->ip6_dst;
9868 
9869 		/*
9870 		 * Reassembly may have changed the next protocol from fragment
9871 		 * to something else, so update.
9872 		 */
9873 		pd->virtual_proto = pd->proto = h->ip6_nxt;
9874 		pd->off = 0;
9875 
9876 		if (pf_walk_header6(pd->m, h, &pd->off, &pd->extoff, &fragoff, &nxt,
9877 			&jumbolen, reason) != PF_PASS) {
9878 			*action = PF_DROP;
9879 			return (-1);
9880 		}
9881 
9882 		if (fragoff != 0)
9883 			pd->virtual_proto = PF_VPROTO_FRAGMENT;
9884 
9885 		break;
9886 	}
9887 #endif
9888 	default:
9889 		panic("pf_setup_pdesc called with illegal af %u", af);
9890 	}
9891 
9892 	switch (pd->virtual_proto) {
9893 	case IPPROTO_TCP: {
9894 		struct tcphdr *th = &pd->hdr.tcp;
9895 
9896 		if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action,
9897 			reason, af)) {
9898 			*action = PF_DROP;
9899 			REASON_SET(reason, PFRES_SHORT);
9900 			return (-1);
9901 		}
9902 		pd->hdrlen = sizeof(*th);
9903 		pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
9904 		pd->sport = &th->th_sport;
9905 		pd->dport = &th->th_dport;
9906 		break;
9907 	}
9908 	case IPPROTO_UDP: {
9909 		struct udphdr *uh = &pd->hdr.udp;
9910 
9911 		if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action,
9912 			reason, af)) {
9913 			*action = PF_DROP;
9914 			REASON_SET(reason, PFRES_SHORT);
9915 			return (-1);
9916 		}
9917 		pd->hdrlen = sizeof(*uh);
9918 		if (uh->uh_dport == 0 ||
9919 		    ntohs(uh->uh_ulen) > pd->m->m_pkthdr.len - pd->off ||
9920 		    ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
9921 			*action = PF_DROP;
9922 			REASON_SET(reason, PFRES_SHORT);
9923 			return (-1);
9924 		}
9925 		pd->sport = &uh->uh_sport;
9926 		pd->dport = &uh->uh_dport;
9927 		break;
9928 	}
9929 	case IPPROTO_SCTP: {
9930 		if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp),
9931 		    action, reason, af)) {
9932 			*action = PF_DROP;
9933 			REASON_SET(reason, PFRES_SHORT);
9934 			return (-1);
9935 		}
9936 		pd->hdrlen = sizeof(pd->hdr.sctp);
9937 		pd->p_len = pd->tot_len - pd->off;
9938 
9939 		pd->sport = &pd->hdr.sctp.src_port;
9940 		pd->dport = &pd->hdr.sctp.dest_port;
9941 		if (pd->hdr.sctp.src_port == 0 || pd->hdr.sctp.dest_port == 0) {
9942 			*action = PF_DROP;
9943 			REASON_SET(reason, PFRES_SHORT);
9944 			return (-1);
9945 		}
9946 		if (pf_scan_sctp(pd) != PF_PASS) {
9947 			*action = PF_DROP;
9948 			REASON_SET(reason, PFRES_SHORT);
9949 			return (-1);
9950 		}
9951 		break;
9952 	}
9953 	case IPPROTO_ICMP: {
9954 		if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
9955 			action, reason, af)) {
9956 			*action = PF_DROP;
9957 			REASON_SET(reason, PFRES_SHORT);
9958 			return (-1);
9959 		}
9960 		pd->hdrlen = ICMP_MINLEN;
9961 		break;
9962 	}
9963 #ifdef INET6
9964 	case IPPROTO_ICMPV6: {
9965 		size_t icmp_hlen = sizeof(struct icmp6_hdr);
9966 
9967 		if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
9968 			action, reason, af)) {
9969 			*action = PF_DROP;
9970 			REASON_SET(reason, PFRES_SHORT);
9971 			return (-1);
9972 		}
9973 		/* ICMP headers we look further into to match state */
9974 		switch (pd->hdr.icmp6.icmp6_type) {
9975 		case MLD_LISTENER_QUERY:
9976 		case MLD_LISTENER_REPORT:
9977 			icmp_hlen = sizeof(struct mld_hdr);
9978 			break;
9979 		case ND_NEIGHBOR_SOLICIT:
9980 		case ND_NEIGHBOR_ADVERT:
9981 			icmp_hlen = sizeof(struct nd_neighbor_solicit);
9982 			break;
9983 		}
9984 		if (icmp_hlen > sizeof(struct icmp6_hdr) &&
9985 		    !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
9986 			action, reason, af)) {
9987 			*action = PF_DROP;
9988 			REASON_SET(reason, PFRES_SHORT);
9989 			return (-1);
9990 		}
9991 		pd->hdrlen = icmp_hlen;
9992 		break;
9993 	}
9994 #endif
9995 	}
9996 	return (0);
9997 }
9998 
9999 static void
10000 pf_counters_inc(int action, struct pf_pdesc *pd,
10001     struct pf_kstate *s, struct pf_krule *r, struct pf_krule *a)
10002 {
10003 	struct pf_krule		*tr;
10004 	int			 dir = pd->dir;
10005 	int			 dirndx;
10006 
10007 	pf_counter_u64_critical_enter();
10008 	pf_counter_u64_add_protected(
10009 	    &pd->kif->pfik_bytes[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS],
10010 	    pd->tot_len);
10011 	pf_counter_u64_add_protected(
10012 	    &pd->kif->pfik_packets[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS],
10013 	    1);
10014 
10015 	if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) {
10016 		dirndx = (dir == PF_OUT);
10017 		pf_counter_u64_add_protected(&r->packets[dirndx], 1);
10018 		pf_counter_u64_add_protected(&r->bytes[dirndx], pd->tot_len);
10019 		pf_update_timestamp(r);
10020 
10021 		if (a != NULL) {
10022 			pf_counter_u64_add_protected(&a->packets[dirndx], 1);
10023 			pf_counter_u64_add_protected(&a->bytes[dirndx], pd->tot_len);
10024 		}
10025 		if (s != NULL) {
10026 			struct pf_krule_item	*ri;
10027 
10028 			if (s->nat_rule != NULL) {
10029 				pf_counter_u64_add_protected(&s->nat_rule->packets[dirndx],
10030 				    1);
10031 				pf_counter_u64_add_protected(&s->nat_rule->bytes[dirndx],
10032 				    pd->tot_len);
10033 			}
10034 			if (s->src_node != NULL) {
10035 				counter_u64_add(s->src_node->packets[dirndx],
10036 				    1);
10037 				counter_u64_add(s->src_node->bytes[dirndx],
10038 				    pd->tot_len);
10039 			}
10040 			if (s->nat_src_node != NULL) {
10041 				counter_u64_add(s->nat_src_node->packets[dirndx],
10042 				    1);
10043 				counter_u64_add(s->nat_src_node->bytes[dirndx],
10044 				    pd->tot_len);
10045 			}
10046 			dirndx = (dir == s->direction) ? 0 : 1;
10047 			s->packets[dirndx]++;
10048 			s->bytes[dirndx] += pd->tot_len;
10049 
10050 			SLIST_FOREACH(ri, &s->match_rules, entry) {
10051 				pf_counter_u64_add_protected(&ri->r->packets[dirndx], 1);
10052 				pf_counter_u64_add_protected(&ri->r->bytes[dirndx], pd->tot_len);
10053 			}
10054 		}
10055 
10056 		tr = r;
10057 		if (s != NULL && s->nat_rule != NULL &&
10058 		    r == &V_pf_default_rule)
10059 			tr = s->nat_rule;
10060 
10061 		if (tr->src.addr.type == PF_ADDR_TABLE)
10062 			pfr_update_stats(tr->src.addr.p.tbl,
10063 			    (s == NULL) ? pd->src :
10064 			    &s->key[(s->direction == PF_IN)]->
10065 				addr[(s->direction == PF_OUT)],
10066 			    pd->af, pd->tot_len, dir == PF_OUT,
10067 			    r->action == PF_PASS, tr->src.neg);
10068 		if (tr->dst.addr.type == PF_ADDR_TABLE)
10069 			pfr_update_stats(tr->dst.addr.p.tbl,
10070 			    (s == NULL) ? pd->dst :
10071 			    &s->key[(s->direction == PF_IN)]->
10072 				addr[(s->direction == PF_IN)],
10073 			    pd->af, pd->tot_len, dir == PF_OUT,
10074 			    r->action == PF_PASS, tr->dst.neg);
10075 	}
10076 	pf_counter_u64_critical_exit();
10077 }
10078 
10079 #if defined(INET) || defined(INET6)
10080 int
10081 pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
10082     struct inpcb *inp, struct pf_rule_actions *default_actions)
10083 {
10084 	struct pfi_kkif		*kif;
10085 	u_short			 action, reason = 0;
10086 	struct m_tag		*mtag;
10087 	struct pf_krule		*a = NULL, *r = &V_pf_default_rule;
10088 	struct pf_kstate	*s = NULL;
10089 	struct pf_kruleset	*ruleset = NULL;
10090 	struct pf_pdesc		 pd;
10091 	int			 use_2nd_queue = 0;
10092 	uint16_t		 tag;
10093 
10094 	PF_RULES_RLOCK_TRACKER;
10095 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
10096 	M_ASSERTPKTHDR(*m0);
10097 
10098 	if (!V_pf_status.running)
10099 		return (PF_PASS);
10100 
10101 	PF_RULES_RLOCK();
10102 
10103 	kif = (struct pfi_kkif *)ifp->if_pf_kif;
10104 
10105 	if (__predict_false(kif == NULL)) {
10106 		DPFPRINTF(PF_DEBUG_URGENT,
10107 		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
10108 		PF_RULES_RUNLOCK();
10109 		return (PF_DROP);
10110 	}
10111 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10112 		PF_RULES_RUNLOCK();
10113 		return (PF_PASS);
10114 	}
10115 
10116 	if ((*m0)->m_flags & M_SKIP_FIREWALL) {
10117 		PF_RULES_RUNLOCK();
10118 		return (PF_PASS);
10119 	}
10120 
10121 #ifdef INET6
10122 	/*
10123 	 * If we end up changing IP addresses (e.g. binat) the stack may get
10124 	 * confused and fail to send the icmp6 packet too big error. Just send
10125 	 * it here, before we do any NAT.
10126 	 */
10127 	if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD &&
10128 	    IN6_LINKMTU(ifp) < pf_max_frag_size(*m0)) {
10129 		PF_RULES_RUNLOCK();
10130 		icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, IN6_LINKMTU(ifp));
10131 		*m0 = NULL;
10132 		return (PF_DROP);
10133 	}
10134 #endif
10135 
10136 	if (__predict_false(! M_WRITABLE(*m0))) {
10137 		*m0 = m_unshare(*m0, M_NOWAIT);
10138 		if (*m0 == NULL) {
10139 			PF_RULES_RUNLOCK();
10140 			return (PF_DROP);
10141 		}
10142 	}
10143 
10144 	pf_init_pdesc(&pd, *m0);
10145 
10146 	if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) {
10147 		pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO;
10148 
10149 		ifp = ifnet_byindexgen(pd.pf_mtag->if_index,
10150 		    pd.pf_mtag->if_idxgen);
10151 		if (ifp == NULL || ifp->if_flags & IFF_DYING) {
10152 			PF_RULES_RUNLOCK();
10153 			m_freem(*m0);
10154 			*m0 = NULL;
10155 			return (PF_PASS);
10156 		}
10157 		PF_RULES_RUNLOCK();
10158 		(ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL);
10159 		*m0 = NULL;
10160 		return (PF_PASS);
10161 	}
10162 
10163 	if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL &&
10164 	    pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) {
10165 		/* Dummynet re-injects packets after they've
10166 		 * completed their delay. We've already
10167 		 * processed them, so pass unconditionally. */
10168 
10169 		/* But only once. We may see the packet multiple times (e.g.
10170 		 * PFIL_IN/PFIL_OUT). */
10171 		pf_dummynet_flag_remove(pd.m, pd.pf_mtag);
10172 		PF_RULES_RUNLOCK();
10173 
10174 		return (PF_PASS);
10175 	}
10176 
10177 	if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason,
10178 		kif, default_actions) == -1) {
10179 		if (action != PF_PASS)
10180 			pd.act.log |= PF_LOG_FORCE;
10181 		goto done;
10182 	}
10183 
10184 	if (__predict_false(ip_divert_ptr != NULL) &&
10185 	    ((mtag = m_tag_locate(pd.m, MTAG_PF_DIVERT, 0, NULL)) != NULL)) {
10186 		struct pf_divert_mtag *dt = (struct pf_divert_mtag *)(mtag+1);
10187 		if ((dt->idir == PF_DIVERT_MTAG_DIR_IN && dir == PF_IN) ||
10188 		    (dt->idir == PF_DIVERT_MTAG_DIR_OUT && dir == PF_OUT)) {
10189 			if (pd.pf_mtag == NULL &&
10190 			    ((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
10191 				action = PF_DROP;
10192 				goto done;
10193 			}
10194 			pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED;
10195 		}
10196 		if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) {
10197 			pd.m->m_flags |= M_FASTFWD_OURS;
10198 			pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
10199 		}
10200 		m_tag_delete(pd.m, mtag);
10201 
10202 		mtag = m_tag_locate(pd.m, MTAG_IPFW_RULE, 0, NULL);
10203 		if (mtag != NULL)
10204 			m_tag_delete(pd.m, mtag);
10205 	}
10206 
10207 	switch (pd.virtual_proto) {
10208 	case PF_VPROTO_FRAGMENT:
10209 		/*
10210 		 * handle fragments that aren't reassembled by
10211 		 * normalization
10212 		 */
10213 		if (kif == NULL || r == NULL) /* pflog */
10214 			action = PF_DROP;
10215 		else
10216 			action = pf_test_rule(&r, &s, &pd, &a,
10217 			    &ruleset, inp);
10218 		if (action != PF_PASS)
10219 			REASON_SET(&reason, PFRES_FRAG);
10220 		break;
10221 
10222 	case IPPROTO_TCP: {
10223 		/* Respond to SYN with a syncookie. */
10224 		if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN &&
10225 		    pd.dir == PF_IN && pf_synflood_check(&pd)) {
10226 			pf_syncookie_send(&pd);
10227 			action = PF_DROP;
10228 			break;
10229 		}
10230 
10231 		if ((tcp_get_flags(&pd.hdr.tcp) & TH_ACK) && pd.p_len == 0)
10232 			use_2nd_queue = 1;
10233 		action = pf_normalize_tcp(&pd);
10234 		if (action == PF_DROP)
10235 			goto done;
10236 		action = pf_test_state_tcp(&s, &pd, &reason);
10237 		if (action == PF_PASS || action == PF_AFRT) {
10238 			if (V_pfsync_update_state_ptr != NULL)
10239 				V_pfsync_update_state_ptr(s);
10240 			r = s->rule;
10241 			a = s->anchor;
10242 		} else if (s == NULL) {
10243 			/* Validate remote SYN|ACK, re-create original SYN if
10244 			 * valid. */
10245 			if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) ==
10246 			    TH_ACK && pf_syncookie_validate(&pd) &&
10247 			    pd.dir == PF_IN) {
10248 				struct mbuf *msyn;
10249 
10250 				msyn = pf_syncookie_recreate_syn(&pd);
10251 				if (msyn == NULL) {
10252 					action = PF_DROP;
10253 					break;
10254 				}
10255 
10256 				action = pf_test(af, dir, pflags, ifp, &msyn, inp,
10257 				    &pd.act);
10258 				m_freem(msyn);
10259 				if (action != PF_PASS)
10260 					break;
10261 
10262 				action = pf_test_state_tcp(&s, &pd, &reason);
10263 				if (action != PF_PASS || s == NULL) {
10264 					action = PF_DROP;
10265 					break;
10266 				}
10267 
10268 				s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1;
10269 				s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1;
10270 				pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST);
10271 				action = pf_synproxy(&pd, &s, &reason);
10272 				break;
10273 			} else {
10274 				action = pf_test_rule(&r, &s, &pd,
10275 				    &a, &ruleset, inp);
10276 			}
10277 		}
10278 		break;
10279 	}
10280 
10281 	case IPPROTO_UDP: {
10282 		action = pf_test_state_udp(&s, &pd);
10283 		if (action == PF_PASS || action == PF_AFRT) {
10284 			if (V_pfsync_update_state_ptr != NULL)
10285 				V_pfsync_update_state_ptr(s);
10286 			r = s->rule;
10287 			a = s->anchor;
10288 		} else if (s == NULL)
10289 			action = pf_test_rule(&r, &s, &pd,
10290 			    &a, &ruleset, inp);
10291 		break;
10292 	}
10293 
10294 	case IPPROTO_SCTP: {
10295 		action = pf_normalize_sctp(&pd);
10296 		if (action == PF_DROP)
10297 			goto done;
10298 		action = pf_test_state_sctp(&s, &pd, &reason);
10299 		if (action == PF_PASS || action == PF_AFRT) {
10300 			if (V_pfsync_update_state_ptr != NULL)
10301 				V_pfsync_update_state_ptr(s);
10302 			r = s->rule;
10303 			a = s->anchor;
10304 		} else if (s == NULL) {
10305 			action = pf_test_rule(&r, &s,
10306 			    &pd, &a, &ruleset, inp);
10307 		}
10308 		break;
10309 	}
10310 
10311 	case IPPROTO_ICMP: {
10312 		if (af != AF_INET) {
10313 			action = PF_DROP;
10314 			REASON_SET(&reason, PFRES_NORM);
10315 			DPFPRINTF(PF_DEBUG_MISC,
10316 			    ("dropping IPv6 packet with ICMPv4 payload"));
10317 			goto done;
10318 		}
10319 		action = pf_test_state_icmp(&s, &pd, &reason);
10320 		if (action == PF_PASS || action == PF_AFRT) {
10321 			if (V_pfsync_update_state_ptr != NULL)
10322 				V_pfsync_update_state_ptr(s);
10323 			r = s->rule;
10324 			a = s->anchor;
10325 		} else if (s == NULL)
10326 			action = pf_test_rule(&r, &s, &pd,
10327 			    &a, &ruleset, inp);
10328 		break;
10329 	}
10330 
10331 	case IPPROTO_ICMPV6: {
10332 		if (af != AF_INET6) {
10333 			action = PF_DROP;
10334 			REASON_SET(&reason, PFRES_NORM);
10335 			DPFPRINTF(PF_DEBUG_MISC,
10336 			    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
10337 			goto done;
10338 		}
10339 		action = pf_test_state_icmp(&s, &pd, &reason);
10340 		if (action == PF_PASS || action == PF_AFRT) {
10341 			if (V_pfsync_update_state_ptr != NULL)
10342 				V_pfsync_update_state_ptr(s);
10343 			r = s->rule;
10344 			a = s->anchor;
10345 		} else if (s == NULL)
10346 			action = pf_test_rule(&r, &s, &pd,
10347 			    &a, &ruleset, inp);
10348 		break;
10349 	}
10350 
10351 	default:
10352 		action = pf_test_state_other(&s, &pd);
10353 		if (action == PF_PASS || action == PF_AFRT) {
10354 			if (V_pfsync_update_state_ptr != NULL)
10355 				V_pfsync_update_state_ptr(s);
10356 			r = s->rule;
10357 			a = s->anchor;
10358 		} else if (s == NULL)
10359 			action = pf_test_rule(&r, &s, &pd,
10360 			    &a, &ruleset, inp);
10361 		break;
10362 	}
10363 
10364 done:
10365 	PF_RULES_RUNLOCK();
10366 
10367 	if (pd.m == NULL)
10368 		goto eat_pkt;
10369 
10370 	if (action == PF_PASS && pd.badopts &&
10371 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
10372 		action = PF_DROP;
10373 		REASON_SET(&reason, PFRES_IPOPTIONS);
10374 		pd.act.log = PF_LOG_FORCE;
10375 		DPFPRINTF(PF_DEBUG_MISC,
10376 		    ("pf: dropping packet with dangerous headers\n"));
10377 	}
10378 
10379 	if (s) {
10380 		uint8_t log = pd.act.log;
10381 		memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions));
10382 		pd.act.log |= log;
10383 		tag = s->tag;
10384 	} else {
10385 		tag = r->tag;
10386 	}
10387 
10388 	if (tag > 0 && pf_tag_packet(&pd, tag)) {
10389 		action = PF_DROP;
10390 		REASON_SET(&reason, PFRES_MEMORY);
10391 	}
10392 
10393 	pf_scrub(&pd);
10394 	if (pd.proto == IPPROTO_TCP && pd.act.max_mss)
10395 		pf_normalize_mss(&pd);
10396 
10397 	if (pd.act.rtableid >= 0)
10398 		M_SETFIB(pd.m, pd.act.rtableid);
10399 
10400 	if (pd.act.flags & PFSTATE_SETPRIO) {
10401 		if (pd.tos & IPTOS_LOWDELAY)
10402 			use_2nd_queue = 1;
10403 		if (vlan_set_pcp(pd.m, pd.act.set_prio[use_2nd_queue])) {
10404 			action = PF_DROP;
10405 			REASON_SET(&reason, PFRES_MEMORY);
10406 			pd.act.log = PF_LOG_FORCE;
10407 			DPFPRINTF(PF_DEBUG_MISC,
10408 			    ("pf: failed to allocate 802.1q mtag\n"));
10409 		}
10410 	}
10411 
10412 #ifdef ALTQ
10413 	if (action == PF_PASS && pd.act.qid) {
10414 		if (pd.pf_mtag == NULL &&
10415 		    ((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
10416 			action = PF_DROP;
10417 			REASON_SET(&reason, PFRES_MEMORY);
10418 		} else {
10419 			if (s != NULL)
10420 				pd.pf_mtag->qid_hash = pf_state_hash(s);
10421 			if (use_2nd_queue || (pd.tos & IPTOS_LOWDELAY))
10422 				pd.pf_mtag->qid = pd.act.pqid;
10423 			else
10424 				pd.pf_mtag->qid = pd.act.qid;
10425 			/* Add hints for ecn. */
10426 			pd.pf_mtag->hdr = mtod(pd.m, void *);
10427 		}
10428 	}
10429 #endif /* ALTQ */
10430 
10431 	/*
10432 	 * connections redirected to loopback should not match sockets
10433 	 * bound specifically to loopback due to security implications,
10434 	 * see tcp_input() and in_pcblookup_listen().
10435 	 */
10436 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
10437 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule != NULL &&
10438 	    (s->nat_rule->action == PF_RDR ||
10439 	    s->nat_rule->action == PF_BINAT) &&
10440 	    pf_is_loopback(af, pd.dst))
10441 		pd.m->m_flags |= M_SKIP_FIREWALL;
10442 
10443 	if (af == AF_INET && __predict_false(ip_divert_ptr != NULL) &&
10444 	    action == PF_PASS && r->divert.port && !PACKET_LOOPED(&pd)) {
10445 		mtag = m_tag_alloc(MTAG_PF_DIVERT, 0,
10446 		    sizeof(struct pf_divert_mtag), M_NOWAIT | M_ZERO);
10447 		if (mtag != NULL) {
10448 			((struct pf_divert_mtag *)(mtag+1))->port =
10449 			    ntohs(r->divert.port);
10450 			((struct pf_divert_mtag *)(mtag+1))->idir =
10451 			    (dir == PF_IN) ? PF_DIVERT_MTAG_DIR_IN :
10452 			    PF_DIVERT_MTAG_DIR_OUT;
10453 
10454 			if (s)
10455 				PF_STATE_UNLOCK(s);
10456 
10457 			m_tag_prepend(pd.m, mtag);
10458 			if (pd.m->m_flags & M_FASTFWD_OURS) {
10459 				if (pd.pf_mtag == NULL &&
10460 				    ((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) {
10461 					action = PF_DROP;
10462 					REASON_SET(&reason, PFRES_MEMORY);
10463 					pd.act.log = PF_LOG_FORCE;
10464 					DPFPRINTF(PF_DEBUG_MISC,
10465 					    ("pf: failed to allocate tag\n"));
10466 				} else {
10467 					pd.pf_mtag->flags |=
10468 					    PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
10469 					pd.m->m_flags &= ~M_FASTFWD_OURS;
10470 				}
10471 			}
10472 			ip_divert_ptr(*m0, dir == PF_IN);
10473 			*m0 = NULL;
10474 
10475 			return (action);
10476 		} else {
10477 			/* XXX: ipfw has the same behaviour! */
10478 			action = PF_DROP;
10479 			REASON_SET(&reason, PFRES_MEMORY);
10480 			pd.act.log = PF_LOG_FORCE;
10481 			DPFPRINTF(PF_DEBUG_MISC,
10482 			    ("pf: failed to allocate divert tag\n"));
10483 		}
10484 	}
10485 	/* XXX: Anybody working on it?! */
10486 	if (af == AF_INET6 && r->divert.port)
10487 		printf("pf: divert(9) is not supported for IPv6\n");
10488 
10489 	/* this flag will need revising if the pkt is forwarded */
10490 	if (pd.pf_mtag)
10491 		pd.pf_mtag->flags &= ~PF_MTAG_FLAG_PACKET_LOOPED;
10492 
10493 	if (pd.act.log) {
10494 		struct pf_krule		*lr;
10495 		struct pf_krule_item	*ri;
10496 
10497 		if (s != NULL && s->nat_rule != NULL &&
10498 		    s->nat_rule->log & PF_LOG_ALL)
10499 			lr = s->nat_rule;
10500 		else
10501 			lr = r;
10502 
10503 		if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL)
10504 			PFLOG_PACKET(action, reason, lr, a,
10505 			    ruleset, &pd, (s == NULL));
10506 		if (s) {
10507 			SLIST_FOREACH(ri, &s->match_rules, entry)
10508 				if (ri->r->log & PF_LOG_ALL)
10509 					PFLOG_PACKET(action,
10510 					    reason, ri->r, a, ruleset, &pd, 0);
10511 		}
10512 	}
10513 
10514 	pf_counters_inc(action, &pd, s, r, a);
10515 
10516 	switch (action) {
10517 	case PF_SYNPROXY_DROP:
10518 		m_freem(*m0);
10519 	case PF_DEFER:
10520 		*m0 = NULL;
10521 		action = PF_PASS;
10522 		break;
10523 	case PF_DROP:
10524 		m_freem(*m0);
10525 		*m0 = NULL;
10526 		break;
10527 	case PF_AFRT:
10528 		if (pf_translate_af(&pd)) {
10529 			if (!pd.m)
10530 				*m0 = NULL;
10531 			action = PF_DROP;
10532 			break;
10533 		}
10534 		*m0 = pd.m; /* pf_translate_af may change pd.m */
10535 #ifdef INET
10536 		if (pd.naf == AF_INET)
10537 			pf_route(m0, r, kif->pfik_ifp, s, &pd, inp);
10538 #endif
10539 #ifdef INET6
10540 		if (pd.naf == AF_INET6)
10541 			pf_route6(m0, r, kif->pfik_ifp, s, &pd, inp);
10542 #endif
10543 		*m0 = NULL;
10544 		action = PF_PASS;
10545 		goto out;
10546 		break;
10547 	default:
10548 		if (pd.act.rt) {
10549 			switch (af) {
10550 #ifdef INET
10551 			case AF_INET:
10552 				/* pf_route() returns unlocked. */
10553 				pf_route(m0, r, kif->pfik_ifp, s, &pd, inp);
10554 				break;
10555 #endif
10556 #ifdef INET6
10557 			case AF_INET6:
10558 				/* pf_route6() returns unlocked. */
10559 				pf_route6(m0, r, kif->pfik_ifp, s, &pd, inp);
10560 				break;
10561 #endif
10562 			}
10563 			goto out;
10564 		}
10565 		if (pf_dummynet(&pd, s, r, m0) != 0) {
10566 			action = PF_DROP;
10567 			REASON_SET(&reason, PFRES_MEMORY);
10568 		}
10569 		break;
10570 	}
10571 
10572 eat_pkt:
10573 	SDT_PROBE4(pf, ip, test, done, action, reason, r, s);
10574 
10575 	if (s && action != PF_DROP) {
10576 		if (!s->if_index_in && dir == PF_IN)
10577 			s->if_index_in = ifp->if_index;
10578 		else if (!s->if_index_out && dir == PF_OUT)
10579 			s->if_index_out = ifp->if_index;
10580 	}
10581 
10582 	if (s)
10583 		PF_STATE_UNLOCK(s);
10584 
10585 out:
10586 #ifdef INET6
10587 	/* If reassembled packet passed, create new fragments. */
10588 	if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT &&
10589 	    (! (pflags & PF_PFIL_NOREFRAGMENT)) &&
10590 	    (mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL)
10591 		action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD);
10592 #endif
10593 
10594 	pf_sctp_multihome_delayed(&pd, kif, s, action);
10595 
10596 	return (action);
10597 }
10598 #endif /* INET || INET6 */
10599