xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision ee1f417a8609b8742332950521800502759dd185)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <crypto/siphash/siphash.h>
51 
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <net/vnet.h>
55 #include <net/pfvar.h>
56 #include <net/if_pflog.h>
57 
58 #ifdef INET
59 #include <netinet/in_var.h>
60 #endif /* INET */
61 
62 #ifdef INET6
63 #include <netinet6/in6_var.h>
64 #endif /* INET6 */
65 
66 
67 /*
68  * Limit the amount of work we do to find a free source port for redirects that
69  * introduce a state conflict.
70  */
71 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
73 
74 static uint64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
75 			    struct pf_poolhashkey *, sa_family_t);
76 static struct pf_krule	*pf_match_translation(int, struct pf_test_ctx *);
77 static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *,
78 			    struct pf_krule *);
79 static int		 pf_get_sport(struct pf_pdesc *, struct pf_krule *,
80 			    struct pf_addr *, uint16_t *, uint16_t, uint16_t,
81 			    struct pf_kpool *, struct pf_udp_mapping **,
82 			    pf_sn_types_t);
83 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
84 
85 static uint64_t
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)86 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
87     struct pf_poolhashkey *key, sa_family_t af)
88 {
89 	SIPHASH_CTX	 ctx;
90 #ifdef INET6
91 	union {
92 		uint64_t hash64;
93 		uint32_t hash32[2];
94 	} h;
95 #endif /* INET6 */
96 	uint64_t	 res = 0;
97 
98 	_Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
99 
100 	switch (af) {
101 #ifdef INET
102 	case AF_INET:
103 		res = SipHash24(&ctx, (const uint8_t *)key,
104 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
105 		hash->addr32[0] = res;
106 		break;
107 #endif /* INET */
108 #ifdef INET6
109 	case AF_INET6:
110 		res = SipHash24(&ctx, (const uint8_t *)key,
111 		    &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
112 		h.hash64 = res;
113 		hash->addr32[0] = h.hash32[0];
114 		hash->addr32[1] = h.hash32[1];
115 		/*
116 		 * siphash isn't big enough, but flipping it around is
117 		 * good enough here.
118 		 */
119 		hash->addr32[2] = ~h.hash32[1];
120 		hash->addr32[3] = ~h.hash32[0];
121 		break;
122 #endif /* INET6 */
123 	default:
124 		unhandled_af(af);
125 	}
126 	return (res);
127 }
128 
129 #define PF_TEST_ATTRIB(t, a)		\
130 	if (t) {			\
131 		r = a;			\
132 		continue;		\
133 	} else do {			\
134 	} while (0)
135 
136 static enum pf_test_status
pf_match_translation_rule(int rs_num,struct pf_test_ctx * ctx,struct pf_kruleset * ruleset)137 pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
138 {
139 	struct pf_krule		*r;
140 	struct pf_pdesc		*pd = ctx->pd;
141 	int			 rtableid = -1;
142 
143 	r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
144 	while (r != NULL) {
145 		struct pf_rule_addr	*src = NULL, *dst = NULL;
146 		struct pf_addr_wrap	*xdst = NULL;
147 
148 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
149 			src = &r->dst;
150 			if (r->rdr.cur != NULL)
151 				xdst = &r->rdr.cur->addr;
152 		} else {
153 			src = &r->src;
154 			dst = &r->dst;
155 		}
156 
157 		pf_counter_u64_add(&r->evaluations, 1);
158 		PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot,
159 			r->skip[PF_SKIP_IFP]);
160 		PF_TEST_ATTRIB(r->direction && r->direction != pd->dir,
161 			r->skip[PF_SKIP_DIR]);
162 		PF_TEST_ATTRIB(r->af && r->af != pd->af,
163 			r->skip[PF_SKIP_AF]);
164 		PF_TEST_ATTRIB(r->proto && r->proto != pd->proto,
165 			r->skip[PF_SKIP_PROTO]);
166 		PF_TEST_ATTRIB(PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af,
167 		    src->neg, pd->kif, M_GETFIB(pd->m)),
168 			r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
169 			    PF_SKIP_DST_ADDR]);
170 		PF_TEST_ATTRIB(src->port_op && !pf_match_port(src->port_op,
171 		    src->port[0], src->port[1], pd->nsport),
172 			r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
173 			    PF_SKIP_DST_PORT]);
174 		PF_TEST_ATTRIB(dst != NULL &&
175 		    PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL,
176 		    M_GETFIB(pd->m)),
177 			r->skip[PF_SKIP_DST_ADDR]);
178 		PF_TEST_ATTRIB(xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af,
179 		    0, NULL, M_GETFIB(pd->m)),
180 			TAILQ_NEXT(r, entries));
181 		PF_TEST_ATTRIB(dst != NULL && dst->port_op &&
182 		    !pf_match_port(dst->port_op, dst->port[0],
183 		    dst->port[1], pd->ndport),
184 			r->skip[PF_SKIP_DST_PORT]);
185 		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag,
186 		    pd->pf_mtag ? pd->pf_mtag->tag : 0),
187 			TAILQ_NEXT(r, entries));
188 		PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
189 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
190 		    &pd->hdr.tcp), r->os_fingerprint)),
191 			TAILQ_NEXT(r, entries));
192 		if (r->tag)
193 			ctx->tag = r->tag;
194 		if (r->rtableid >= 0)
195 			rtableid = r->rtableid;
196 		if (r->anchor == NULL) {
197 			if (r->action == PF_NONAT ||
198 			    r->action == PF_NORDR ||
199 			    r->action == PF_NOBINAT) {
200 				*ctx->rm = NULL;
201 			} else {
202 				/*
203 				 * found matching r
204 				 */
205 				ctx->tr = r;
206 				/*
207 				 * anchor, with ruleset, where r belongs to
208 				 */
209 				*ctx->am = ctx->a;
210 				/*
211 				 * ruleset where r belongs to
212 				 */
213 				*ctx->rsm = ruleset;
214 				/*
215 				 * ruleset, where anchor belongs to.
216 				 */
217 				ctx->arsm = ctx->aruleset;
218 			}
219 			break;
220 		} else {
221 			ctx->a = r;			/* remember anchor */
222 			ctx->aruleset = ruleset;	/* and its ruleset */
223 			if (pf_step_into_translation_anchor(rs_num, ctx,
224 			    r) != PF_TEST_OK) {
225 				break;
226 			}
227 		}
228 		r = TAILQ_NEXT(r, entries);
229 	}
230 
231 	if (ctx->tag > 0 && pf_tag_packet(pd, ctx->tag))
232 		return (PF_TEST_FAIL);
233 	if (rtableid >= 0)
234 		M_SETFIB(pd->m, rtableid);
235 
236 	return (PF_TEST_OK);
237 }
238 
239 static enum pf_test_status
pf_step_into_translation_anchor(int rs_num,struct pf_test_ctx * ctx,struct pf_krule * r)240 pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_krule *r)
241 {
242 	enum pf_test_status	rv;
243 
244 	PF_RULES_RASSERT();
245 
246 	if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
247 		printf("%s: anchor stack overflow on %s\n",
248 		    __func__, r->anchor->name);
249 		return (PF_TEST_FAIL);
250 	}
251 
252 	ctx->depth++;
253 
254 	if (r->anchor_wildcard) {
255 		struct pf_kanchor *child;
256 		rv = PF_TEST_OK;
257 		RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
258 			rv = pf_match_translation_rule(rs_num, ctx, &child->ruleset);
259 			if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
260 				/*
261 				 * we either hit a rule qith quick action
262 				 * (more likely), or hit some runtime
263 				 * error (e.g. pool_get() faillure).
264 				 */
265 				break;
266 			}
267 		}
268 	} else {
269 		rv = pf_match_translation_rule(rs_num, ctx, &r->anchor->ruleset);
270 	}
271 
272 	ctx->depth--;
273 
274 	return (rv);
275 }
276 
277 static struct pf_krule *
pf_match_translation(int rs_num,struct pf_test_ctx * ctx)278 pf_match_translation(int rs_num, struct pf_test_ctx *ctx)
279 {
280 	enum pf_test_status rv;
281 
282 	MPASS(ctx->depth == 0);
283 	rv = pf_match_translation_rule(rs_num, ctx, &pf_main_ruleset);
284 	MPASS(ctx->depth == 0);
285 	if (rv != PF_TEST_OK)
286 		return (NULL);
287 
288 	return (ctx->tr);
289 }
290 
291 static int
pf_get_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_kpool * rpool,struct pf_udp_mapping ** udp_mapping,pf_sn_types_t sn_type)292 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr,
293     uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool,
294     struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type)
295 {
296 	struct pf_state_key_cmp	key;
297 	struct pf_addr		init_addr;
298 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
299 	int			sidx = pd->sidx;
300 	int			didx = pd->didx;
301 
302 	bzero(&init_addr, sizeof(init_addr));
303 
304 	if (udp_mapping) {
305 		MPASS(*udp_mapping == NULL);
306 	}
307 
308 	/*
309 	 * If we are UDP and have an existing mapping we can get source port
310 	 * from the mapping. In this case we have to look up the src_node as
311 	 * pf_map_addr would.
312 	 */
313 	if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) {
314 		struct pf_udp_endpoint_cmp udp_source;
315 
316 		bzero(&udp_source, sizeof(udp_source));
317 		udp_source.af = pd->af;
318 		pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af);
319 		udp_source.port = pd->nsport;
320 		if (udp_mapping) {
321 			struct pf_ksrc_node	*sn = NULL;
322 			struct pf_srchash	*sh = NULL;
323 			*udp_mapping = pf_udp_mapping_find(&udp_source);
324 			if (*udp_mapping) {
325 				pf_addrcpy(naddr,
326 				    &(*udp_mapping)->endpoints[1].addr,
327 				    pd->af);
328 				*nport = (*udp_mapping)->endpoints[1].port;
329 				/*
330 				 * Try to find a src_node as per pf_map_addr().
331 				 * XXX: Why? This code seems to do nothing.
332 				 */
333 				if (rpool->opts & PF_POOL_STICKYADDR &&
334 				    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
335 					sn = pf_find_src_node(&pd->nsaddr, r,
336 					    pd->af, &sh, sn_type, false);
337 				if (sn != NULL)
338 					PF_SRC_NODE_UNLOCK(sn);
339 				return (0);
340 			} else {
341 				*udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr,
342 				    pd->nsport, &init_addr, 0);
343 				if (*udp_mapping == NULL)
344 					return (1);
345 			}
346 		}
347 	}
348 
349 	if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL,
350 	    &init_addr, rpool, sn_type))
351 		goto failed;
352 
353 	if (pd->proto == IPPROTO_ICMP) {
354 		if (pd->ndport == htons(ICMP_ECHO)) {
355 			low = 1;
356 			high = 65535;
357 		} else
358 			return (0);	/* Don't try to modify non-echo ICMP */
359 	}
360 #ifdef INET6
361 	if (pd->proto == IPPROTO_ICMPV6) {
362 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
363 			low = 1;
364 			high = 65535;
365 		} else
366 			return (0);	/* Don't try to modify non-echo ICMP */
367 	}
368 #endif /* INET6 */
369 
370 	bzero(&key, sizeof(key));
371 	key.af = pd->naf;
372 	key.proto = pd->proto;
373 
374 	do {
375 		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
376 		pf_addrcpy(&key.addr[sidx], naddr, key.af);
377 		key.port[didx] = pd->ndport;
378 
379 		if (udp_mapping && *udp_mapping)
380 			pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr,
381 			    pd->af);
382 
383 		/*
384 		 * port search; start random, step;
385 		 * similar 2 portloop in in_pcbbind
386 		 */
387 		if (pd->proto == IPPROTO_SCTP) {
388 			key.port[sidx] = pd->nsport;
389 			if (!pf_find_state_all_exists(&key, dir)) {
390 				*nport = pd->nsport;
391 				return (0);
392 			} else {
393 				return (1); /* Fail mapping. */
394 			}
395 		} else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
396 		    pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
397 			/*
398 			 * XXX bug: icmp states don't use the id on both sides.
399 			 * (traceroute -I through nat)
400 			 */
401 			key.port[sidx] = pd->nsport;
402 			if (!pf_find_state_all_exists(&key, dir)) {
403 				*nport = pd->nsport;
404 				return (0);
405 			}
406 		} else if (low == high) {
407 			key.port[sidx] = htons(low);
408 			if (!pf_find_state_all_exists(&key, dir)) {
409 				if (udp_mapping && *udp_mapping != NULL) {
410 					(*udp_mapping)->endpoints[1].port = htons(low);
411 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
412 						*nport = htons(low);
413 						return (0);
414 					}
415 				} else {
416 					*nport = htons(low);
417 					return (0);
418 				}
419 			}
420 		} else {
421 			uint32_t tmp;
422 			uint16_t cut;
423 
424 			if (low > high) {
425 				tmp = low;
426 				low = high;
427 				high = tmp;
428 			}
429 			/* low < high */
430 			cut = arc4random() % (1 + high - low) + low;
431 			/* low <= cut <= high */
432 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
433 				if (udp_mapping && *udp_mapping != NULL) {
434 					(*udp_mapping)->endpoints[sidx].port = htons(tmp);
435 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
436 						*nport = htons(tmp);
437 						return (0);
438 					}
439 				} else {
440 					key.port[sidx] = htons(tmp);
441 					if (!pf_find_state_all_exists(&key, dir)) {
442 						*nport = htons(tmp);
443 						return (0);
444 					}
445 				}
446 			}
447 			tmp = cut;
448 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
449 				if (pd->proto == IPPROTO_UDP &&
450 				    (rpool->opts & PF_POOL_ENDPI &&
451 				    udp_mapping != NULL)) {
452 					(*udp_mapping)->endpoints[1].port = htons(tmp);
453 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
454 						*nport = htons(tmp);
455 						return (0);
456 					}
457 				} else {
458 					key.port[sidx] = htons(tmp);
459 					if (!pf_find_state_all_exists(&key, dir)) {
460 						*nport = htons(tmp);
461 						return (0);
462 					}
463 				}
464 			}
465 		}
466 
467 		switch (rpool->opts & PF_POOL_TYPEMASK) {
468 		case PF_POOL_RANDOM:
469 		case PF_POOL_ROUNDROBIN:
470 			/*
471 			 * pick a different source address since we're out
472 			 * of free port choices for the current one.
473 			 */
474 			if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr,
475 			    &(pd->naf), NULL, &init_addr, rpool, sn_type))
476 				return (1);
477 			break;
478 		case PF_POOL_NONE:
479 		case PF_POOL_SRCHASH:
480 		case PF_POOL_BITMASK:
481 		default:
482 			return (1);
483 		}
484 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
485 
486 failed:
487 	if (udp_mapping) {
488 		uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
489 		*udp_mapping = NULL;
490 	}
491 
492 	return (1);					/* none available */
493 }
494 
495 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)496 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
497 {
498 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
499 		return (true);
500 	return (false);
501 }
502 
503 static int
pf_get_mape_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,struct pf_udp_mapping ** udp_mapping,struct pf_kpool * rpool)504 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
505     struct pf_addr *naddr, uint16_t *nport, struct pf_udp_mapping **udp_mapping,
506     struct pf_kpool *rpool)
507 {
508 	uint16_t psmask, low, highmask;
509 	uint16_t i, ahigh, cut;
510 	int ashift, psidshift;
511 
512 	ashift = 16 - rpool->mape.offset;
513 	psidshift = ashift - rpool->mape.psidlen;
514 	psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1);
515 	psmask = psmask << psidshift;
516 	highmask = (1U << psidshift) - 1;
517 
518 	ahigh = (1U << rpool->mape.offset) - 1;
519 	cut = arc4random() & ahigh;
520 	if (cut == 0)
521 		cut = 1;
522 
523 	for (i = cut; i <= ahigh; i++) {
524 		low = (i << ashift) | psmask;
525 		if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask,
526 		    rpool, udp_mapping, PF_SN_NAT))
527 			return (0);
528 	}
529 	for (i = cut - 1; i > 0; i--) {
530 		low = (i << ashift) | psmask;
531 		if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask,
532 		    rpool, udp_mapping, PF_SN_NAT))
533 			return (0);
534 	}
535 	return (1);
536 }
537 
538 static __inline  u_short
pf_check_src_node_valid(struct pf_ksrc_node * sn,struct pf_kpool * rpool)539 pf_check_src_node_valid(struct pf_ksrc_node *sn, struct pf_kpool *rpool)
540 {
541 	struct pf_addr		*raddr, *rmask;
542 	struct pf_addr		*caddr; /* cached redirection address */
543 	struct pf_kpooladdr	*pa;
544 	sa_family_t		 raf;
545 	sa_family_t		 caf; /* cached redirection AF */
546 	u_short			 valid = 0;
547 
548 	KASSERT(sn != NULL, ("sn is NULL"));
549 	KASSERT(rpool != NULL, ("rpool is NULL"));
550 
551 	/* check if the cached entry is still valid */
552 
553 	if (sn->type ==  PF_SN_LIMIT) {
554 		/* Always valid as it does not store redirection address */
555 		return (1);
556 	}
557 
558 	mtx_lock(&rpool->mtx);
559 	caddr = &(sn->raddr);
560 	caf = sn->raf;
561 
562 	TAILQ_FOREACH(pa, &rpool->list, entries) {
563 		if (PF_AZERO(caddr, caf)) {
564 			valid = 1;
565 			goto done;
566 		} else if (pa->addr.type == PF_ADDR_DYNIFTL) {
567 			if (pfr_kentry_byaddr(pa->addr.p.dyn->pfid_kt, caddr, caf, 0)) {
568 				valid = 1;
569 				goto done;
570 			}
571 		} else if (pa->addr.type == PF_ADDR_TABLE) {
572 			if (pfr_kentry_byaddr(pa->addr.p.tbl, caddr, caf, 0)) {
573 				valid = 1;
574 				goto done;
575 			}
576 		} else if (pa->addr.type != PF_ADDR_NOROUTE) {
577 			/* PF_ADDR_URPFFAILED, PF_ADDR_RANGE, PF_ADDR_ADDRMASK */
578 			raddr = &(pa->addr.v.a.addr);
579 			rmask = &(pa->addr.v.a.mask);
580 			raf = pa->af;
581 			if (raf == caf && pf_match_addr(0, raddr, rmask, caddr, caf)) {
582 				valid = 1;
583 				goto done;
584 			}
585 		}
586 		/* else PF_ADDR_NOROUTE */
587 	}
588 
589 done:
590 	mtx_unlock(&rpool->mtx);
591 
592 	return (valid);
593 }
594 
595 u_short
pf_map_addr(sa_family_t saf,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,sa_family_t * naf,struct pf_addr * init_addr,struct pf_kpool * rpool)596 pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
597     struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf,
598     struct pf_addr *init_addr, struct pf_kpool *rpool)
599 {
600 	u_short			 reason = PFRES_MATCH;
601 	struct pf_addr		*raddr = NULL, *rmask = NULL;
602 	struct pfr_ktable	*kt;
603 	uint64_t		 hashidx;
604 	int			 cnt;
605 	sa_family_t		 wanted_af;
606 	u_int8_t		 pool_type;
607 	bool			 prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH;
608 
609 	KASSERT(saf != 0, ("%s: saf == 0", __func__));
610 	KASSERT(naf != NULL, ("%s: naf = NULL", __func__));
611 	KASSERT((*naf) != 0, ("%s: *naf = 0", __func__));
612 
613 	/*
614 	 * Given (*naf) is a hint about AF of the forwarded packet.
615 	 * It might be changed if prefer_ipv6_nexthop is enabled and
616 	 * the combination of nexthop AF and packet AF allows for it.
617 	 */
618 	wanted_af = (*naf);
619 
620 	mtx_lock(&rpool->mtx);
621 	/* Find the route using chosen algorithm. Store the found route
622 	   in src_node if it was given or found. */
623 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
624 		reason = PFRES_MAPFAILED;
625 		goto done_pool_mtx;
626 	}
627 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
628 		switch (wanted_af) {
629 #ifdef INET
630 		case AF_INET:
631 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
632 			    !PF_POOL_DYNTYPE(rpool->opts)) {
633 				reason = PFRES_MAPFAILED;
634 				goto done_pool_mtx;
635 			}
636 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
637 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
638 			break;
639 #endif /* INET */
640 #ifdef INET6
641 		case AF_INET6:
642 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
643 			    !PF_POOL_DYNTYPE(rpool->opts)) {
644 				reason = PFRES_MAPFAILED;
645 				goto done_pool_mtx;
646 			}
647 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
648 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
649 			break;
650 #endif /* INET6 */
651 		default:
652 			unhandled_af(wanted_af);
653 		}
654 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
655 		if (!PF_POOL_DYNTYPE(rpool->opts)) {
656 			reason = PFRES_MAPFAILED;
657 			goto done_pool_mtx; /* unsupported */
658 		}
659 	} else {
660 		raddr = &rpool->cur->addr.v.a.addr;
661 		rmask = &rpool->cur->addr.v.a.mask;
662 	}
663 
664 	/*
665 	 * For pools with a single host with the prefer-ipv6-nexthop option
666 	 * we can return pool address of any AF, unless the forwarded packet
667 	 * is IPv6, then we can return only if pool address is IPv6.
668 	 * For non-prefer-ipv6-nexthop we can return pool address only
669 	 * of wanted AF, unless the pool address'es AF is unknown, which
670 	 * happens in case old ioctls have been used to set up the pool.
671 	 *
672 	 * Round-robin pools have their own logic for retrying next addresses.
673 	 */
674 	pool_type = rpool->opts & PF_POOL_TYPEMASK;
675 	if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK ||
676 	    ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) &&
677 	    rpool->cur->addr.type != PF_ADDR_TABLE &&
678 	    rpool->cur->addr.type != PF_ADDR_DYNIFTL)) {
679 		if (prefer_ipv6_nexthop) {
680 			if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) {
681 				reason = PFRES_MAPFAILED;
682 				goto done_pool_mtx;
683 			}
684 			wanted_af = rpool->cur->af;
685 		} else {
686 			if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) {
687 				reason = PFRES_MAPFAILED;
688 				goto done_pool_mtx;
689 			}
690 		}
691 	}
692 
693 	switch (pool_type) {
694 	case PF_POOL_NONE:
695 		pf_addrcpy(naddr, raddr, wanted_af);
696 		break;
697 	case PF_POOL_BITMASK:
698 		pf_poolmask(naddr, raddr, rmask, saddr, wanted_af);
699 		break;
700 	case PF_POOL_RANDOM:
701 		if (rpool->cur->addr.type == PF_ADDR_TABLE ||
702 		    rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
703 			if (rpool->cur->addr.type == PF_ADDR_TABLE)
704 				kt = rpool->cur->addr.p.tbl;
705 			else
706 				kt = rpool->cur->addr.p.dyn->pfid_kt;
707 			kt = pfr_ktable_select_active(kt);
708 			if (kt == NULL) {
709 				reason = PFRES_MAPFAILED;
710 				goto done_pool_mtx; /* unsupported */
711 			}
712 			cnt = kt->pfrkt_cnt;
713 			if (cnt == 0)
714 				rpool->tblidx = 0;
715 			else
716 				rpool->tblidx = (int)arc4random_uniform(cnt);
717 			memset(&rpool->counter, 0, sizeof(rpool->counter));
718 			if (prefer_ipv6_nexthop)
719 				wanted_af = AF_INET6;
720 		retry_other_af_random:
721 			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
722 			    wanted_af, pf_islinklocal, false)) {
723 				/* Retry with IPv4 nexthop for IPv4 traffic */
724 				if (prefer_ipv6_nexthop &&
725 				    wanted_af == AF_INET6 &&
726 				    (*naf) == AF_INET) {
727 					wanted_af = AF_INET;
728 					goto retry_other_af_random;
729 				} else {
730 					 /* no hosts in wanted AF */
731 					reason = PFRES_MAPFAILED;
732 					goto done_pool_mtx;
733 				}
734 			}
735 			pf_addrcpy(naddr, &rpool->counter, wanted_af);
736 		} else if (init_addr != NULL && PF_AZERO(init_addr,
737 		    wanted_af)) {
738 			switch (wanted_af) {
739 #ifdef INET
740 			case AF_INET:
741 				rpool->counter.addr32[0] = arc4random();
742 				break;
743 #endif /* INET */
744 #ifdef INET6
745 			case AF_INET6:
746 				if (rmask->addr32[3] != 0xffffffff)
747 					rpool->counter.addr32[3] =
748 					    arc4random();
749 				else
750 					break;
751 				if (rmask->addr32[2] != 0xffffffff)
752 					rpool->counter.addr32[2] =
753 					    arc4random();
754 				else
755 					break;
756 				if (rmask->addr32[1] != 0xffffffff)
757 					rpool->counter.addr32[1] =
758 					    arc4random();
759 				else
760 					break;
761 				if (rmask->addr32[0] != 0xffffffff)
762 					rpool->counter.addr32[0] =
763 					    arc4random();
764 				break;
765 #endif /* INET6 */
766 			}
767 			pf_poolmask(naddr, raddr, rmask, &rpool->counter,
768 			    wanted_af);
769 			pf_addrcpy(init_addr, naddr, wanted_af);
770 
771 		} else {
772 			pf_addr_inc(&rpool->counter, wanted_af);
773 			pf_poolmask(naddr, raddr, rmask, &rpool->counter,
774 			    wanted_af);
775 		}
776 		break;
777 	case PF_POOL_SRCHASH:
778 	    {
779 		unsigned char hash[16];
780 
781 		hashidx =
782 		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key,
783 		    wanted_af);
784 		if (rpool->cur->addr.type == PF_ADDR_TABLE ||
785 		    rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
786 			if (rpool->cur->addr.type == PF_ADDR_TABLE)
787 				kt = rpool->cur->addr.p.tbl;
788 			else
789 				kt = rpool->cur->addr.p.dyn->pfid_kt;
790 			kt = pfr_ktable_select_active(kt);
791 			if (kt == NULL) {
792 				reason = PFRES_MAPFAILED;
793 				goto done_pool_mtx; /* unsupported */
794 			}
795 			cnt = kt->pfrkt_cnt;
796 			if (cnt == 0)
797 				rpool->tblidx = 0;
798 			else
799 				rpool->tblidx = (int)(hashidx % cnt);
800 			memset(&rpool->counter, 0, sizeof(rpool->counter));
801 			if (prefer_ipv6_nexthop)
802 				wanted_af = AF_INET6;
803 		retry_other_af_srchash:
804 			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
805 			    wanted_af, pf_islinklocal, false)) {
806 				/* Retry with IPv4 nexthop for IPv4 traffic */
807 				if (prefer_ipv6_nexthop &&
808 				    wanted_af == AF_INET6 &&
809 				    (*naf) == AF_INET) {
810 					wanted_af = AF_INET;
811 					goto retry_other_af_srchash;
812 				} else {
813 					 /* no hosts in wanted AF */
814 					reason = PFRES_MAPFAILED;
815 					goto done_pool_mtx;
816 				}
817 			}
818 			pf_addrcpy(naddr, &rpool->counter, wanted_af);
819 		} else {
820 			pf_poolmask(naddr, raddr, rmask,
821 			    (struct pf_addr *)&hash, wanted_af);
822 		}
823 		break;
824 	    }
825 	case PF_POOL_ROUNDROBIN:
826 	    {
827 		struct pf_kpooladdr *acur = rpool->cur;
828 
829 	retry_other_af_rr:
830 		if (prefer_ipv6_nexthop)
831 			wanted_af = rpool->ipv6_nexthop_af;
832 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
833 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
834 			    &rpool->tblidx, &rpool->counter, wanted_af,
835 			    NULL, true))
836 				goto get_addr;
837 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
838 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
839 			    &rpool->tblidx, &rpool->counter, wanted_af,
840 			    pf_islinklocal, true))
841 				goto get_addr;
842 		} else if (rpool->cur->af == wanted_af &&
843 		    pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af))
844 			goto get_addr;
845 		if (prefer_ipv6_nexthop &&
846 		    (*naf) == AF_INET && wanted_af == AF_INET6) {
847 			/* Reset table index when changing wanted AF. */
848 			rpool->tblidx = -1;
849 			rpool->ipv6_nexthop_af = AF_INET;
850 			goto retry_other_af_rr;
851 		}
852 	try_next:
853 		/* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */
854 		rpool->ipv6_nexthop_af = AF_INET6;
855 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
856 			rpool->cur = TAILQ_FIRST(&rpool->list);
857 		else
858 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
859 	try_next_ipv6_nexthop_rr:
860 		/* Reset table index when iterating pools or changing wanted AF. */
861 		rpool->tblidx = -1;
862 		if (prefer_ipv6_nexthop)
863 			wanted_af = rpool->ipv6_nexthop_af;
864 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
865 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
866 			    &rpool->tblidx, &rpool->counter, wanted_af, NULL,
867 			    true))
868 				goto get_addr;
869 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
870 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
871 			    &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal,
872 			    true))
873 				goto get_addr;
874 		} else {
875 			if (rpool->cur->af == wanted_af) {
876 				raddr = &rpool->cur->addr.v.a.addr;
877 				rmask = &rpool->cur->addr.v.a.mask;
878 				pf_addrcpy(&rpool->counter, raddr, wanted_af);
879 				goto get_addr;
880 			}
881 		}
882 		if (prefer_ipv6_nexthop &&
883 		    (*naf) == AF_INET && wanted_af == AF_INET6) {
884 			rpool->ipv6_nexthop_af = AF_INET;
885 			goto try_next_ipv6_nexthop_rr;
886 		}
887 		if (rpool->cur != acur)
888 			goto try_next;
889 		reason = PFRES_MAPFAILED;
890 		goto done_pool_mtx;
891 	get_addr:
892 		pf_addrcpy(naddr, &rpool->counter, wanted_af);
893 		if (init_addr != NULL && PF_AZERO(init_addr, wanted_af))
894 			pf_addrcpy(init_addr, naddr, wanted_af);
895 		pf_addr_inc(&rpool->counter, wanted_af);
896 		break;
897 	    }
898 	}
899 
900 	if (wanted_af == 0) {
901 		reason = PFRES_MAPFAILED;
902 		goto done_pool_mtx;
903 	}
904 
905 	if (nkif)
906 		*nkif = rpool->cur->kif;
907 
908 	(*naf) = wanted_af;
909 
910 done_pool_mtx:
911 	mtx_unlock(&rpool->mtx);
912 
913 	return (reason);
914 }
915 
916 u_short
pf_map_addr_sn(sa_family_t saf,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,sa_family_t * naf,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_kpool * rpool,pf_sn_types_t sn_type)917 pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
918     struct pf_addr *naddr, sa_family_t *naf, struct pfi_kkif **nkif,
919     struct pf_addr *init_addr, struct pf_kpool *rpool, pf_sn_types_t sn_type)
920 {
921 	struct pf_ksrc_node	*sn = NULL;
922 	struct pf_srchash	*sh = NULL;
923 	u_short			 reason = 0;
924 
925 	/*
926 	 * If this is a sticky-address rule, try to find an existing src_node.
927 	 */
928 	if (rpool->opts & PF_POOL_STICKYADDR &&
929 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
930 		sn = pf_find_src_node(saddr, r, saf, &sh, sn_type, false);
931 
932 	if (sn != NULL) {
933 		PF_SRC_NODE_LOCK_ASSERT(sn);
934 		/*
935 		 * Check if source node's redirection address still exists
936 		 * in pool from which the SN was created. If not, delete it.
937 		 * Similar to pf_kill_srcnodes(). Unlink the source node
938 		 * from tree, unlink it from states, then free it. Do not
939 		 * overlap source node and state locks to avoid LOR.
940 		 */
941 		if (!pf_check_src_node_valid(sn, rpool)) {
942 			pf_unlink_src_node(sn);
943 			PF_SRC_NODE_UNLOCK(sn);
944 			if (V_pf_status.debug >= PF_DEBUG_NOISY) {
945 				printf("%s: stale src tracking (%d) ",
946 				    __func__, sn_type);
947 				pf_print_host(saddr, 0, saf);
948 				printf(" to ");
949 				pf_print_host(&(sn->raddr), 0, sn->raf);
950 				if (nkif)
951 					printf("@%s", sn->rkif->pfik_name);
952 				printf("\n");
953 			}
954 
955 			for (int i = 0; i <= V_pf_hashmask; i++) {
956 				struct pf_idhash *ih = &V_pf_idhash[i];
957 				struct pf_kstate *st;
958 
959 				PF_HASHROW_LOCK(ih);
960 				LIST_FOREACH(st, &ih->states, entry) {
961 					if (st->sns[sn->type] == sn) {
962 						st->sns[sn->type] = NULL;
963 					}
964 				}
965 				PF_HASHROW_UNLOCK(ih);
966 			}
967 			pf_free_src_node(sn);
968 			counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
969 			sn = NULL;
970 			goto map_addr;
971 		}
972 
973 		(*naf) = sn->raf;
974 
975 		/* If the supplied address is the same as the current one we've
976 		 * been asked before, so tell the caller that there's no other
977 		 * address to be had. */
978 
979 		if (PF_AEQ(naddr, &(sn->raddr), *naf)) {
980 			printf("%s: no more addresses\n", __func__);
981 			reason = PFRES_MAPFAILED;
982 			goto done;
983 		}
984 
985 		pf_addrcpy(naddr, &(sn->raddr), *naf);
986 
987 		if (nkif)
988 			*nkif = sn->rkif;
989 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
990 			printf("%s: src tracking maps ", __func__);
991 			pf_print_host(saddr, 0, saf);
992 			printf(" to ");
993 			pf_print_host(naddr, 0, *naf);
994 			if (nkif)
995 				printf("@%s", (*nkif)->pfik_name);
996 			printf("\n");
997 		}
998 		goto done;
999 	}
1000 
1001 map_addr:
1002 	/*
1003 	 * Source node has not been found or is invalid. Find a new address
1004 	 * and store it in variables given by the caller.
1005 	 */
1006 	if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr,
1007 	    rpool)) != 0) {
1008 		if (V_pf_status.debug >= PF_DEBUG_MISC)
1009 			printf("%s: pf_map_addr has failed\n", __func__);
1010 		goto done;
1011 	}
1012 
1013 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
1014 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1015 		printf("%s: selected address ", __func__);
1016 		pf_print_host(naddr, 0, *naf);
1017 		if (nkif)
1018 			printf("@%s", (*nkif)->pfik_name);
1019 		printf("\n");
1020 	}
1021 
1022 done:
1023 	if (sn != NULL)
1024 		PF_SRC_NODE_UNLOCK(sn);
1025 
1026 	return (reason);
1027 }
1028 
1029 u_short
pf_get_translation(struct pf_test_ctx * ctx)1030 pf_get_translation(struct pf_test_ctx *ctx)
1031 {
1032 	struct pf_krule	*r = NULL;
1033 	u_short		 transerror;
1034 
1035 	PF_RULES_RASSERT();
1036 	KASSERT(ctx->sk == NULL, ("*skp not NULL"));
1037 	KASSERT(ctx->nk == NULL, ("*nkp not NULL"));
1038 
1039 	ctx->nr = NULL;
1040 
1041 	if (ctx->pd->dir == PF_OUT) {
1042 		r = pf_match_translation(PF_RULESET_BINAT, ctx);
1043 		if (r == NULL)
1044 			r = pf_match_translation(PF_RULESET_NAT, ctx);
1045 	} else {
1046 		r = pf_match_translation(PF_RULESET_RDR, ctx);
1047 		if (r == NULL)
1048 			r = pf_match_translation(PF_RULESET_BINAT, ctx);
1049 	}
1050 
1051 	if (r == NULL)
1052 		return (PFRES_MAX);
1053 
1054 	switch (r->action) {
1055 	case PF_NONAT:
1056 	case PF_NOBINAT:
1057 	case PF_NORDR:
1058 		return (PFRES_MAX);
1059 	}
1060 
1061 	transerror = pf_get_transaddr(ctx, r, r->action, &(r->rdr));
1062 	if (transerror == PFRES_MATCH)
1063 		ctx->nr = r;
1064 
1065 	return (transerror);
1066 }
1067 
1068 u_short
pf_get_transaddr(struct pf_test_ctx * ctx,struct pf_krule * r,uint8_t nat_action,struct pf_kpool * rpool)1069 pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
1070     uint8_t nat_action, struct pf_kpool *rpool)
1071 {
1072 	struct pf_pdesc	*pd = ctx->pd;
1073 	struct pf_addr	*naddr;
1074 	int		 idx;
1075 	uint16_t	*nportp;
1076 	uint16_t	 low, high;
1077 	u_short		 reason;
1078 
1079 	PF_RULES_RASSERT();
1080 	KASSERT(r != NULL, ("r is NULL"));
1081 	KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule"));
1082 
1083 	if (ctx->sk == NULL && ctx->nk == NULL) {
1084 		if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &ctx->sk,
1085 		    &ctx->nk))
1086 			return (PFRES_MEMORY);
1087 	}
1088 
1089 	switch (nat_action) {
1090 	case PF_NAT:
1091 		idx = pd->sidx;
1092 		break;
1093 	case PF_BINAT:
1094 		idx = 1;
1095 		break;
1096 	case PF_RDR:
1097 		idx = pd->didx;
1098 		break;
1099 	}
1100 	naddr = &ctx->nk->addr[idx];
1101 	nportp = &ctx->nk->port[idx];
1102 
1103 	switch (nat_action) {
1104 	case PF_NAT:
1105 		if (pd->proto == IPPROTO_ICMP) {
1106 			low = 1;
1107 			high = 65535;
1108 		} else {
1109 			low  = rpool->proxy_port[0];
1110 			high = rpool->proxy_port[1];
1111 		}
1112 		if (rpool->mape.offset > 0) {
1113 			if (pf_get_mape_sport(pd, r, naddr, nportp,
1114 			    &ctx->udp_mapping, rpool)) {
1115 				DPFPRINTF(PF_DEBUG_MISC,
1116 				    "pf: MAP-E port allocation (%u/%u/%u)"
1117 				    " failed",
1118 				    rpool->mape.offset,
1119 				    rpool->mape.psidlen,
1120 				    rpool->mape.psid);
1121 				reason = PFRES_MAPFAILED;
1122 				goto notrans;
1123 			}
1124 		} else if (pf_get_sport(pd, r, naddr, nportp, low, high,
1125 		    rpool, &ctx->udp_mapping, PF_SN_NAT)) {
1126 			DPFPRINTF(PF_DEBUG_MISC,
1127 			    "pf: NAT proxy port allocation (%u-%u) failed",
1128 			    rpool->proxy_port[0], rpool->proxy_port[1]);
1129 			reason = PFRES_MAPFAILED;
1130 			goto notrans;
1131 		}
1132 		break;
1133 	case PF_BINAT:
1134 		switch (pd->dir) {
1135 		case PF_OUT:
1136 			if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){
1137 				switch (pd->af) {
1138 #ifdef INET
1139 				case AF_INET:
1140 					if (rpool->cur->addr.p.dyn->
1141 					    pfid_acnt4 < 1) {
1142 						reason = PFRES_MAPFAILED;
1143 						goto notrans;
1144 					}
1145 					pf_poolmask(naddr,
1146 					    &rpool->cur->addr.p.dyn->pfid_addr4,
1147 					    &rpool->cur->addr.p.dyn->pfid_mask4,
1148 					    &pd->nsaddr, AF_INET);
1149 					break;
1150 #endif /* INET */
1151 #ifdef INET6
1152 				case AF_INET6:
1153 					if (rpool->cur->addr.p.dyn->
1154 					    pfid_acnt6 < 1) {
1155 						reason = PFRES_MAPFAILED;
1156 						goto notrans;
1157 					}
1158 					pf_poolmask(naddr,
1159 					    &rpool->cur->addr.p.dyn->pfid_addr6,
1160 					    &rpool->cur->addr.p.dyn->pfid_mask6,
1161 					    &pd->nsaddr, AF_INET6);
1162 					break;
1163 #endif /* INET6 */
1164 				}
1165 			} else
1166 				pf_poolmask(naddr,
1167 				    &rpool->cur->addr.v.a.addr,
1168 				    &rpool->cur->addr.v.a.mask, &pd->nsaddr,
1169 				    pd->af);
1170 			break;
1171 		case PF_IN:
1172 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
1173 				switch (pd->af) {
1174 #ifdef INET
1175 				case AF_INET:
1176 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
1177 						reason = PFRES_MAPFAILED;
1178 						goto notrans;
1179 					}
1180 					pf_poolmask(naddr,
1181 					    &r->src.addr.p.dyn->pfid_addr4,
1182 					    &r->src.addr.p.dyn->pfid_mask4,
1183 					    &pd->ndaddr, AF_INET);
1184 					break;
1185 #endif /* INET */
1186 #ifdef INET6
1187 				case AF_INET6:
1188 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
1189 						reason = PFRES_MAPFAILED;
1190 						goto notrans;
1191 					}
1192 					pf_poolmask(naddr,
1193 					    &r->src.addr.p.dyn->pfid_addr6,
1194 					    &r->src.addr.p.dyn->pfid_mask6,
1195 					    &pd->ndaddr, AF_INET6);
1196 					break;
1197 #endif /* INET6 */
1198 				}
1199 			} else
1200 				pf_poolmask(naddr, &r->src.addr.v.a.addr,
1201 				    &r->src.addr.v.a.mask, &pd->ndaddr, pd->af);
1202 			break;
1203 		}
1204 		break;
1205 	case PF_RDR: {
1206 		struct pf_state_key_cmp key;
1207 		int tries;
1208 		uint16_t cut, low, high, nport;
1209 
1210 		reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr,
1211 		    &(pd->naf), NULL, NULL, rpool, PF_SN_NAT);
1212 
1213 		if (reason != 0)
1214 			goto notrans;
1215 		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
1216 			pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask,
1217 			    &pd->ndaddr, pd->af);
1218 
1219 		/* Do not change SCTP ports. */
1220 		if (pd->proto == IPPROTO_SCTP)
1221 			break;
1222 
1223 		if (rpool->proxy_port[1]) {
1224 			uint32_t	tmp_nport;
1225 			uint16_t	div;
1226 
1227 			div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1;
1228 			div = (div == 0) ? 1 : div;
1229 
1230 			tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) +
1231 			    rpool->proxy_port[0];
1232 
1233 			/* Wrap around if necessary. */
1234 			if (tmp_nport > 65535)
1235 				tmp_nport -= 65535;
1236 			nport = htons((uint16_t)tmp_nport);
1237 		} else if (rpool->proxy_port[0])
1238 			nport = htons(rpool->proxy_port[0]);
1239 		else
1240 			nport = pd->ndport;
1241 
1242 		/*
1243 		 * Update the destination port.
1244 		 */
1245 		*nportp = nport;
1246 
1247 		/*
1248 		 * Do we have a source port conflict in the stack state?  Try to
1249 		 * modulate the source port if so.  Note that this is racy since
1250 		 * the state lookup may not find any matches here but will once
1251 		 * pf_create_state() actually instantiates the state.
1252 		 */
1253 		bzero(&key, sizeof(key));
1254 		key.af = pd->af;
1255 		key.proto = pd->proto;
1256 		key.port[0] = pd->nsport;
1257 		pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af);
1258 		key.port[1] = nport;
1259 		pf_addrcpy(&key.addr[1], naddr, key.af);
1260 
1261 		if (!pf_find_state_all_exists(&key, PF_OUT))
1262 			break;
1263 
1264 		tries = 0;
1265 
1266 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
1267 		high = 65535;
1268 		cut = arc4random() % (1 + high - low) + low;
1269 		for (uint32_t tmp = cut;
1270 		    tmp <= high && tmp <= UINT16_MAX &&
1271 		    tries < V_pf_rdr_srcport_rewrite_tries;
1272 		    tmp++, tries++) {
1273 			key.port[0] = htons(tmp);
1274 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
1275 				/* Update the source port. */
1276 				ctx->nk->port[0] = htons(tmp);
1277 				goto out;
1278 			}
1279 		}
1280 		for (uint32_t tmp = cut - 1;
1281 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
1282 		    tmp--, tries++) {
1283 			key.port[0] = htons(tmp);
1284 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
1285 				/* Update the source port. */
1286 				ctx->nk->port[0] = htons(tmp);
1287 				goto out;
1288 			}
1289 		}
1290 
1291 		/*
1292 		 * We failed to find a match.  Push on ahead anyway, let
1293 		 * pf_state_insert() be the arbiter of whether the state
1294 		 * conflict is tolerable.  In particular, with TCP connections
1295 		 * the state may be reused if the TCP state is terminal.
1296 		 */
1297 		DPFPRINTF(PF_DEBUG_MISC,
1298 		    "pf: RDR source port allocation failed");
1299 		break;
1300 
1301 out:
1302 		DPFPRINTF(PF_DEBUG_MISC,
1303 		    "pf: RDR source port allocation %u->%u",
1304 		    ntohs(pd->nsport), ntohs(ctx->nk->port[0]));
1305 		break;
1306 	}
1307 	default:
1308 		panic("%s: unknown action %u", __func__, r->action);
1309 	}
1310 
1311 	/* Return success only if translation really happened. */
1312 	if (bcmp(ctx->sk, ctx->nk, sizeof(struct pf_state_key_cmp))) {
1313 		return (PFRES_MATCH);
1314 	}
1315 
1316 	reason = PFRES_MAX;
1317 notrans:
1318 	uma_zfree(V_pf_state_key_z, ctx->nk);
1319 	uma_zfree(V_pf_state_key_z, ctx->sk);
1320 	ctx->sk = ctx->nk = NULL;
1321 
1322 	return (reason);
1323 }
1324 
1325 int
pf_get_transaddr_af(struct pf_krule * r,struct pf_pdesc * pd)1326 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
1327 {
1328 #if defined(INET) && defined(INET6)
1329 	struct pf_addr	 ndaddr, nsaddr, naddr;
1330 	u_int16_t	 nport = 0;
1331 	int		 prefixlen = 96;
1332 
1333 	bzero(&nsaddr, sizeof(nsaddr));
1334 	bzero(&ndaddr, sizeof(ndaddr));
1335 
1336 	if (V_pf_status.debug >= PF_DEBUG_MISC) {
1337 		printf("pf: af-to %s %s, ",
1338 		    pd->naf == AF_INET ? "inet" : "inet6",
1339 		    TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr");
1340 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
1341 		printf(" -> ");
1342 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
1343 		printf("\n");
1344 	}
1345 
1346 	if (TAILQ_EMPTY(&r->nat.list))
1347 		panic("pf_get_transaddr_af: no nat pool for source address");
1348 
1349 	/* get source address and port */
1350 	if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0],
1351 	    r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) {
1352 		DPFPRINTF(PF_DEBUG_MISC,
1353 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
1354 		    r->nat.proxy_port[0], r->nat.proxy_port[1]);
1355 		return (-1);
1356 	}
1357 
1358 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
1359 		pd->ndport = ntohs(pd->ndport);
1360 		if (pd->ndport == ICMP6_ECHO_REQUEST)
1361 			pd->ndport = ICMP_ECHO;
1362 		else if (pd->ndport == ICMP6_ECHO_REPLY)
1363 			pd->ndport = ICMP_ECHOREPLY;
1364 		pd->ndport = htons(pd->ndport);
1365 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
1366 		pd->nsport = ntohs(pd->nsport);
1367 		if (pd->ndport == ICMP_ECHO)
1368 			pd->ndport = ICMP6_ECHO_REQUEST;
1369 		else if (pd->ndport == ICMP_ECHOREPLY)
1370 			pd->ndport = ICMP6_ECHO_REPLY;
1371 		pd->nsport = htons(pd->nsport);
1372 	}
1373 
1374 	/* get the destination address and port */
1375 	if (! TAILQ_EMPTY(&r->rdr.list)) {
1376 		if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, &(pd->naf),
1377 		    NULL, NULL, &r->rdr, PF_SN_NAT))
1378 			return (-1);
1379 		if (r->rdr.proxy_port[0])
1380 			pd->ndport = htons(r->rdr.proxy_port[0]);
1381 
1382 		if (pd->naf == AF_INET) {
1383 			/* The prefix is the IPv4 rdr address */
1384 			prefixlen = in_mask2len(
1385 			    (struct in_addr *)&r->rdr.cur->addr.v.a.mask);
1386 			inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1387 			    prefixlen);
1388 		} else {
1389 			/* The prefix is the IPv6 rdr address */
1390 			prefixlen = in6_mask2len(
1391 			    (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL);
1392 			inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1393 			    prefixlen);
1394 		}
1395 	} else {
1396 		if (pd->naf == AF_INET) {
1397 			/* The prefix is the IPv6 dst address */
1398 			prefixlen = in6_mask2len(
1399 			    (struct in6_addr *)&r->dst.addr.v.a.mask, NULL);
1400 			if (prefixlen < 32)
1401 				prefixlen = 96;
1402 			inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr,
1403 			    prefixlen);
1404 		} else {
1405 			/*
1406 			 * The prefix is the IPv6 nat address
1407 			 * (that was stored in pd->nsaddr)
1408 			 */
1409 			prefixlen = in6_mask2len(
1410 			    (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL);
1411 			if (prefixlen > 96)
1412 				prefixlen = 96;
1413 			inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr,
1414 			    prefixlen);
1415 		}
1416 	}
1417 
1418 	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
1419 	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
1420 
1421 	if (V_pf_status.debug >= PF_DEBUG_MISC) {
1422 		printf("pf: af-to %s done, prefixlen %d, ",
1423 		    pd->naf == AF_INET ? "inet" : "inet6",
1424 		    prefixlen);
1425 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
1426 		printf(" -> ");
1427 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
1428 		printf("\n");
1429 	}
1430 
1431 	return (0);
1432 #else
1433 	return (-1);
1434 #endif
1435 }
1436