xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision 65c318630123fcf2b6f491bf4d02a5cad3031d20)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <crypto/siphash/siphash.h>
51 
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <net/vnet.h>
55 #include <net/pfvar.h>
56 #include <net/if_pflog.h>
57 
58 #ifdef INET
59 #include <netinet/in_var.h>
60 #endif /* INET */
61 
62 #ifdef INET6
63 #include <netinet6/in6_var.h>
64 #endif /* INET6 */
65 
66 
67 /*
68  * Limit the amount of work we do to find a free source port for redirects that
69  * introduce a state conflict.
70  */
71 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
73 
74 static uint64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
75 			    struct pf_poolhashkey *, sa_family_t);
76 struct pf_krule		*pf_match_translation(int, struct pf_test_ctx *);
77 static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *,
78 			    struct pf_krule *);
79 static int		 pf_get_sport(struct pf_pdesc *, struct pf_krule *,
80 			    struct pf_addr *, uint16_t *, uint16_t, uint16_t,
81 			    struct pf_kpool *, struct pf_udp_mapping **,
82 			    pf_sn_types_t);
83 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
84 
85 static uint64_t
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)86 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
87     struct pf_poolhashkey *key, sa_family_t af)
88 {
89 	SIPHASH_CTX	 ctx;
90 #ifdef INET6
91 	union {
92 		uint64_t hash64;
93 		uint32_t hash32[2];
94 	} h;
95 #endif /* INET6 */
96 	uint64_t	 res = 0;
97 
98 	_Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
99 
100 	switch (af) {
101 #ifdef INET
102 	case AF_INET:
103 		res = SipHash24(&ctx, (const uint8_t *)key,
104 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
105 		hash->addr32[0] = res;
106 		break;
107 #endif /* INET */
108 #ifdef INET6
109 	case AF_INET6:
110 		res = SipHash24(&ctx, (const uint8_t *)key,
111 		    &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
112 		h.hash64 = res;
113 		hash->addr32[0] = h.hash32[0];
114 		hash->addr32[1] = h.hash32[1];
115 		/*
116 		 * siphash isn't big enough, but flipping it around is
117 		 * good enough here.
118 		 */
119 		hash->addr32[2] = ~h.hash32[1];
120 		hash->addr32[3] = ~h.hash32[0];
121 		break;
122 #endif /* INET6 */
123 	default:
124 		unhandled_af(af);
125 	}
126 	return (res);
127 }
128 
129 #define PF_TEST_ATTRIB(t, a)		\
130 	if (t) {			\
131 		r = a;			\
132 		continue;		\
133 	} else do {			\
134 	} while (0)
135 
136 static enum pf_test_status
pf_match_translation_rule(int rs_num,struct pf_test_ctx * ctx,struct pf_kruleset * ruleset)137 pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
138 {
139 	struct pf_krule		*r;
140 	struct pf_pdesc		*pd = ctx->pd;
141 	int			 rtableid = -1;
142 
143 	r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
144 	while (r != NULL) {
145 		struct pf_rule_addr	*src = NULL, *dst = NULL;
146 		struct pf_addr_wrap	*xdst = NULL;
147 
148 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
149 			src = &r->dst;
150 			if (r->rdr.cur != NULL)
151 				xdst = &r->rdr.cur->addr;
152 		} else {
153 			src = &r->src;
154 			dst = &r->dst;
155 		}
156 
157 		pf_counter_u64_add(&r->evaluations, 1);
158 		PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot,
159 			r->skip[PF_SKIP_IFP]);
160 		PF_TEST_ATTRIB(r->direction && r->direction != pd->dir,
161 			r->skip[PF_SKIP_DIR]);
162 		PF_TEST_ATTRIB(r->af && r->af != pd->af,
163 			r->skip[PF_SKIP_AF]);
164 		PF_TEST_ATTRIB(r->proto && r->proto != pd->proto,
165 			r->skip[PF_SKIP_PROTO]);
166 		PF_TEST_ATTRIB(PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af,
167 		    src->neg, pd->kif, M_GETFIB(pd->m)),
168 			r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
169 			    PF_SKIP_DST_ADDR]);
170 		PF_TEST_ATTRIB(src->port_op && !pf_match_port(src->port_op,
171 		    src->port[0], src->port[1], pd->nsport),
172 			r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
173 			    PF_SKIP_DST_PORT]);
174 		PF_TEST_ATTRIB(dst != NULL &&
175 		    PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL,
176 		    M_GETFIB(pd->m)),
177 			r->skip[PF_SKIP_DST_ADDR]);
178 		PF_TEST_ATTRIB(xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af,
179 		    0, NULL, M_GETFIB(pd->m)),
180 			TAILQ_NEXT(r, entries));
181 		PF_TEST_ATTRIB(dst != NULL && dst->port_op &&
182 		    !pf_match_port(dst->port_op, dst->port[0],
183 		    dst->port[1], pd->ndport),
184 			r->skip[PF_SKIP_DST_PORT]);
185 		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag,
186 		    pd->pf_mtag ? pd->pf_mtag->tag : 0),
187 			TAILQ_NEXT(r, entries));
188 		PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
189 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
190 		    &pd->hdr.tcp), r->os_fingerprint)),
191 			TAILQ_NEXT(r, entries));
192 		if (r->tag)
193 			ctx->tag = r->tag;
194 		if (r->rtableid >= 0)
195 			rtableid = r->rtableid;
196 		if (r->anchor == NULL) {
197 			if (r->action == PF_NONAT ||
198 			    r->action == PF_NORDR ||
199 			    r->action == PF_NOBINAT) {
200 				*ctx->rm = NULL;
201 			} else {
202 				/*
203 				 * found matching r
204 				 */
205 				ctx->tr = r;
206 				/*
207 				 * anchor, with ruleset, where r belongs to
208 				 */
209 				*ctx->am = ctx->a;
210 				/*
211 				 * ruleset where r belongs to
212 				 */
213 				*ctx->rsm = ruleset;
214 				/*
215 				 * ruleset, where anchor belongs to.
216 				 */
217 				ctx->arsm = ctx->aruleset;
218 			}
219 		} else {
220 			ctx->a = r;			/* remember anchor */
221 			ctx->aruleset = ruleset;	/* and its ruleset */
222 			if (pf_step_into_translation_anchor(rs_num, ctx,
223 			    r) != PF_TEST_OK) {
224 				break;
225 			}
226 		}
227 		r = TAILQ_NEXT(r, entries);
228 	}
229 
230 	if (ctx->tag > 0 && pf_tag_packet(pd, ctx->tag))
231 		return (PF_TEST_FAIL);
232 	if (rtableid >= 0)
233 		M_SETFIB(pd->m, rtableid);
234 
235 	return (PF_TEST_OK);
236 }
237 
238 static enum pf_test_status
pf_step_into_translation_anchor(int rs_num,struct pf_test_ctx * ctx,struct pf_krule * r)239 pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_krule *r)
240 {
241 	enum pf_test_status	rv;
242 
243 	PF_RULES_RASSERT();
244 
245 	if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
246 		printf("%s: anchor stack overflow on %s\n",
247 		    __func__, r->anchor->name);
248 		return (PF_TEST_FAIL);
249 	}
250 
251 	ctx->depth++;
252 
253 	if (r->anchor_wildcard) {
254 		struct pf_kanchor *child;
255 		rv = PF_TEST_OK;
256 		RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
257 			rv = pf_match_translation_rule(rs_num, ctx, &child->ruleset);
258 			if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
259 				/*
260 				 * we either hit a rule qith quick action
261 				 * (more likely), or hit some runtime
262 				 * error (e.g. pool_get() faillure).
263 				 */
264 				break;
265 			}
266 		}
267 	} else {
268 		rv = pf_match_translation_rule(rs_num, ctx, &r->anchor->ruleset);
269 	}
270 
271 	ctx->depth--;
272 
273 	return (rv);
274 }
275 
276 struct pf_krule *
pf_match_translation(int rs_num,struct pf_test_ctx * ctx)277 pf_match_translation(int rs_num, struct pf_test_ctx *ctx)
278 {
279 	enum pf_test_status rv;
280 
281 	MPASS(ctx->depth == 0);
282 	rv = pf_match_translation_rule(rs_num, ctx, &pf_main_ruleset);
283 	MPASS(ctx->depth == 0);
284 	if (rv != PF_TEST_OK)
285 		return (NULL);
286 
287 	return (ctx->tr);
288 }
289 
290 static int
pf_get_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_kpool * rpool,struct pf_udp_mapping ** udp_mapping,pf_sn_types_t sn_type)291 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr,
292     uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool,
293     struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type)
294 {
295 	struct pf_state_key_cmp	key;
296 	struct pf_addr		init_addr;
297 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
298 	int			sidx = pd->sidx;
299 	int			didx = pd->didx;
300 
301 	bzero(&init_addr, sizeof(init_addr));
302 
303 	if (udp_mapping) {
304 		MPASS(*udp_mapping == NULL);
305 	}
306 
307 	/*
308 	 * If we are UDP and have an existing mapping we can get source port
309 	 * from the mapping. In this case we have to look up the src_node as
310 	 * pf_map_addr would.
311 	 */
312 	if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) {
313 		struct pf_udp_endpoint_cmp udp_source;
314 
315 		bzero(&udp_source, sizeof(udp_source));
316 		udp_source.af = pd->af;
317 		pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af);
318 		udp_source.port = pd->nsport;
319 		if (udp_mapping) {
320 			struct pf_ksrc_node	*sn = NULL;
321 			struct pf_srchash	*sh = NULL;
322 			*udp_mapping = pf_udp_mapping_find(&udp_source);
323 			if (*udp_mapping) {
324 				pf_addrcpy(naddr,
325 				    &(*udp_mapping)->endpoints[1].addr,
326 				    pd->af);
327 				*nport = (*udp_mapping)->endpoints[1].port;
328 				/*
329 				 * Try to find a src_node as per pf_map_addr().
330 				 * XXX: Why? This code seems to do nothing.
331 				 */
332 				if (rpool->opts & PF_POOL_STICKYADDR &&
333 				    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
334 					sn = pf_find_src_node(&pd->nsaddr, r,
335 					    pd->af, &sh, sn_type, false);
336 				if (sn != NULL)
337 					PF_SRC_NODE_UNLOCK(sn);
338 				return (0);
339 			} else {
340 				*udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr,
341 				    pd->nsport, &init_addr, 0);
342 				if (*udp_mapping == NULL)
343 					return (1);
344 			}
345 		}
346 	}
347 
348 	if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL,
349 	    &init_addr, rpool, sn_type))
350 		goto failed;
351 
352 	if (pd->proto == IPPROTO_ICMP) {
353 		if (pd->ndport == htons(ICMP_ECHO)) {
354 			low = 1;
355 			high = 65535;
356 		} else
357 			return (0);	/* Don't try to modify non-echo ICMP */
358 	}
359 #ifdef INET6
360 	if (pd->proto == IPPROTO_ICMPV6) {
361 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
362 			low = 1;
363 			high = 65535;
364 		} else
365 			return (0);	/* Don't try to modify non-echo ICMP */
366 	}
367 #endif /* INET6 */
368 
369 	bzero(&key, sizeof(key));
370 	key.af = pd->naf;
371 	key.proto = pd->proto;
372 
373 	do {
374 		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
375 		pf_addrcpy(&key.addr[sidx], naddr, key.af);
376 		key.port[didx] = pd->ndport;
377 
378 		if (udp_mapping && *udp_mapping)
379 			pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr,
380 			    pd->af);
381 
382 		/*
383 		 * port search; start random, step;
384 		 * similar 2 portloop in in_pcbbind
385 		 */
386 		if (pd->proto == IPPROTO_SCTP) {
387 			key.port[sidx] = pd->nsport;
388 			if (!pf_find_state_all_exists(&key, dir)) {
389 				*nport = pd->nsport;
390 				return (0);
391 			} else {
392 				return (1); /* Fail mapping. */
393 			}
394 		} else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
395 		    pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
396 			/*
397 			 * XXX bug: icmp states don't use the id on both sides.
398 			 * (traceroute -I through nat)
399 			 */
400 			key.port[sidx] = pd->nsport;
401 			if (!pf_find_state_all_exists(&key, dir)) {
402 				*nport = pd->nsport;
403 				return (0);
404 			}
405 		} else if (low == high) {
406 			key.port[sidx] = htons(low);
407 			if (!pf_find_state_all_exists(&key, dir)) {
408 				if (udp_mapping && *udp_mapping != NULL) {
409 					(*udp_mapping)->endpoints[1].port = htons(low);
410 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
411 						*nport = htons(low);
412 						return (0);
413 					}
414 				} else {
415 					*nport = htons(low);
416 					return (0);
417 				}
418 			}
419 		} else {
420 			uint32_t tmp;
421 			uint16_t cut;
422 
423 			if (low > high) {
424 				tmp = low;
425 				low = high;
426 				high = tmp;
427 			}
428 			/* low < high */
429 			cut = arc4random() % (1 + high - low) + low;
430 			/* low <= cut <= high */
431 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
432 				if (udp_mapping && *udp_mapping != NULL) {
433 					(*udp_mapping)->endpoints[sidx].port = htons(tmp);
434 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
435 						*nport = htons(tmp);
436 						return (0);
437 					}
438 				} else {
439 					key.port[sidx] = htons(tmp);
440 					if (!pf_find_state_all_exists(&key, dir)) {
441 						*nport = htons(tmp);
442 						return (0);
443 					}
444 				}
445 			}
446 			tmp = cut;
447 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
448 				if (pd->proto == IPPROTO_UDP &&
449 				    (rpool->opts & PF_POOL_ENDPI &&
450 				    udp_mapping != NULL)) {
451 					(*udp_mapping)->endpoints[1].port = htons(tmp);
452 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
453 						*nport = htons(tmp);
454 						return (0);
455 					}
456 				} else {
457 					key.port[sidx] = htons(tmp);
458 					if (!pf_find_state_all_exists(&key, dir)) {
459 						*nport = htons(tmp);
460 						return (0);
461 					}
462 				}
463 			}
464 		}
465 
466 		switch (rpool->opts & PF_POOL_TYPEMASK) {
467 		case PF_POOL_RANDOM:
468 		case PF_POOL_ROUNDROBIN:
469 			/*
470 			 * pick a different source address since we're out
471 			 * of free port choices for the current one.
472 			 */
473 			if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr,
474 			    &(pd->naf), NULL, &init_addr, rpool, sn_type))
475 				return (1);
476 			break;
477 		case PF_POOL_NONE:
478 		case PF_POOL_SRCHASH:
479 		case PF_POOL_BITMASK:
480 		default:
481 			return (1);
482 		}
483 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
484 
485 failed:
486 	if (udp_mapping) {
487 		uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
488 		*udp_mapping = NULL;
489 	}
490 
491 	return (1);					/* none available */
492 }
493 
494 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)495 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
496 {
497 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
498 		return (true);
499 	return (false);
500 }
501 
502 static int
pf_get_mape_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,struct pf_udp_mapping ** udp_mapping,struct pf_kpool * rpool)503 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
504     struct pf_addr *naddr, uint16_t *nport, struct pf_udp_mapping **udp_mapping,
505     struct pf_kpool *rpool)
506 {
507 	uint16_t psmask, low, highmask;
508 	uint16_t i, ahigh, cut;
509 	int ashift, psidshift;
510 
511 	ashift = 16 - rpool->mape.offset;
512 	psidshift = ashift - rpool->mape.psidlen;
513 	psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1);
514 	psmask = psmask << psidshift;
515 	highmask = (1U << psidshift) - 1;
516 
517 	ahigh = (1U << rpool->mape.offset) - 1;
518 	cut = arc4random() & ahigh;
519 	if (cut == 0)
520 		cut = 1;
521 
522 	for (i = cut; i <= ahigh; i++) {
523 		low = (i << ashift) | psmask;
524 		if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask,
525 		    rpool, udp_mapping, PF_SN_NAT))
526 			return (0);
527 	}
528 	for (i = cut - 1; i > 0; i--) {
529 		low = (i << ashift) | psmask;
530 		if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask,
531 		    rpool, udp_mapping, PF_SN_NAT))
532 			return (0);
533 	}
534 	return (1);
535 }
536 
537 u_short
pf_map_addr(sa_family_t saf,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,sa_family_t * naf,struct pf_addr * init_addr,struct pf_kpool * rpool)538 pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
539     struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf,
540     struct pf_addr *init_addr, struct pf_kpool *rpool)
541 {
542 	u_short			 reason = PFRES_MATCH;
543 	struct pf_addr		*raddr = NULL, *rmask = NULL;
544 	struct pfr_ktable	*kt;
545 	uint64_t		 hashidx;
546 	int			 cnt;
547 	sa_family_t		 wanted_af;
548 	u_int8_t		 pool_type;
549 	bool			 prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH;
550 
551 	KASSERT(saf != 0, ("%s: saf == 0", __func__));
552 	KASSERT(naf != NULL, ("%s: naf = NULL", __func__));
553 	KASSERT((*naf) != 0, ("%s: *naf = 0", __func__));
554 
555 	/*
556 	 * Given (*naf) is a hint about AF of the forwarded packet.
557 	 * It might be changed if prefer_ipv6_nexthop is enabled and
558 	 * the combination of nexthop AF and packet AF allows for it.
559 	 */
560 	wanted_af = (*naf);
561 
562 	mtx_lock(&rpool->mtx);
563 	/* Find the route using chosen algorithm. Store the found route
564 	   in src_node if it was given or found. */
565 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
566 		reason = PFRES_MAPFAILED;
567 		goto done_pool_mtx;
568 	}
569 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
570 		switch (wanted_af) {
571 #ifdef INET
572 		case AF_INET:
573 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
574 			    !PF_POOL_DYNTYPE(rpool->opts)) {
575 				reason = PFRES_MAPFAILED;
576 				goto done_pool_mtx;
577 			}
578 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
579 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
580 			break;
581 #endif /* INET */
582 #ifdef INET6
583 		case AF_INET6:
584 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
585 			    !PF_POOL_DYNTYPE(rpool->opts)) {
586 				reason = PFRES_MAPFAILED;
587 				goto done_pool_mtx;
588 			}
589 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
590 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
591 			break;
592 #endif /* INET6 */
593 		default:
594 			unhandled_af(wanted_af);
595 		}
596 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
597 		if (!PF_POOL_DYNTYPE(rpool->opts)) {
598 			reason = PFRES_MAPFAILED;
599 			goto done_pool_mtx; /* unsupported */
600 		}
601 	} else {
602 		raddr = &rpool->cur->addr.v.a.addr;
603 		rmask = &rpool->cur->addr.v.a.mask;
604 	}
605 
606 	/*
607 	 * For pools with a single host with the prefer-ipv6-nexthop option
608 	 * we can return pool address of any AF, unless the forwarded packet
609 	 * is IPv6, then we can return only if pool address is IPv6.
610 	 * For non-prefer-ipv6-nexthop we can return pool address only
611 	 * of wanted AF, unless the pool address'es AF is unknown, which
612 	 * happens in case old ioctls have been used to set up the pool.
613 	 *
614 	 * Round-robin pools have their own logic for retrying next addresses.
615 	 */
616 	pool_type = rpool->opts & PF_POOL_TYPEMASK;
617 	if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK ||
618 	    ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) &&
619 	    rpool->cur->addr.type != PF_ADDR_TABLE &&
620 	    rpool->cur->addr.type != PF_ADDR_DYNIFTL)) {
621 		if (prefer_ipv6_nexthop) {
622 			if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) {
623 				reason = PFRES_MAPFAILED;
624 				goto done_pool_mtx;
625 			}
626 			wanted_af = rpool->cur->af;
627 		} else {
628 			if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) {
629 				reason = PFRES_MAPFAILED;
630 				goto done_pool_mtx;
631 			}
632 		}
633 	}
634 
635 	switch (pool_type) {
636 	case PF_POOL_NONE:
637 		pf_addrcpy(naddr, raddr, wanted_af);
638 		break;
639 	case PF_POOL_BITMASK:
640 		pf_poolmask(naddr, raddr, rmask, saddr, wanted_af);
641 		break;
642 	case PF_POOL_RANDOM:
643 		if (rpool->cur->addr.type == PF_ADDR_TABLE ||
644 		    rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
645 			if (rpool->cur->addr.type == PF_ADDR_TABLE)
646 				kt = rpool->cur->addr.p.tbl;
647 			else
648 				kt = rpool->cur->addr.p.dyn->pfid_kt;
649 			kt = pfr_ktable_select_active(kt);
650 			if (kt == NULL) {
651 				reason = PFRES_MAPFAILED;
652 				goto done_pool_mtx; /* unsupported */
653 			}
654 			cnt = kt->pfrkt_cnt;
655 			if (cnt == 0)
656 				rpool->tblidx = 0;
657 			else
658 				rpool->tblidx = (int)arc4random_uniform(cnt);
659 			memset(&rpool->counter, 0, sizeof(rpool->counter));
660 			if (prefer_ipv6_nexthop)
661 				wanted_af = AF_INET6;
662 		retry_other_af_random:
663 			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
664 			    wanted_af, pf_islinklocal, false)) {
665 				/* Retry with IPv4 nexthop for IPv4 traffic */
666 				if (prefer_ipv6_nexthop &&
667 				    wanted_af == AF_INET6 &&
668 				    (*naf) == AF_INET) {
669 					wanted_af = AF_INET;
670 					goto retry_other_af_random;
671 				} else {
672 					 /* no hosts in wanted AF */
673 					reason = PFRES_MAPFAILED;
674 					goto done_pool_mtx;
675 				}
676 			}
677 			pf_addrcpy(naddr, &rpool->counter, wanted_af);
678 		} else if (init_addr != NULL && PF_AZERO(init_addr,
679 		    wanted_af)) {
680 			switch (wanted_af) {
681 #ifdef INET
682 			case AF_INET:
683 				rpool->counter.addr32[0] = arc4random();
684 				break;
685 #endif /* INET */
686 #ifdef INET6
687 			case AF_INET6:
688 				if (rmask->addr32[3] != 0xffffffff)
689 					rpool->counter.addr32[3] =
690 					    arc4random();
691 				else
692 					break;
693 				if (rmask->addr32[2] != 0xffffffff)
694 					rpool->counter.addr32[2] =
695 					    arc4random();
696 				else
697 					break;
698 				if (rmask->addr32[1] != 0xffffffff)
699 					rpool->counter.addr32[1] =
700 					    arc4random();
701 				else
702 					break;
703 				if (rmask->addr32[0] != 0xffffffff)
704 					rpool->counter.addr32[0] =
705 					    arc4random();
706 				break;
707 #endif /* INET6 */
708 			}
709 			pf_poolmask(naddr, raddr, rmask, &rpool->counter,
710 			    wanted_af);
711 			pf_addrcpy(init_addr, naddr, wanted_af);
712 
713 		} else {
714 			pf_addr_inc(&rpool->counter, wanted_af);
715 			pf_poolmask(naddr, raddr, rmask, &rpool->counter,
716 			    wanted_af);
717 		}
718 		break;
719 	case PF_POOL_SRCHASH:
720 	    {
721 		unsigned char hash[16];
722 
723 		hashidx =
724 		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key,
725 		    wanted_af);
726 		if (rpool->cur->addr.type == PF_ADDR_TABLE ||
727 		    rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
728 			if (rpool->cur->addr.type == PF_ADDR_TABLE)
729 				kt = rpool->cur->addr.p.tbl;
730 			else
731 				kt = rpool->cur->addr.p.dyn->pfid_kt;
732 			kt = pfr_ktable_select_active(kt);
733 			if (kt == NULL) {
734 				reason = PFRES_MAPFAILED;
735 				goto done_pool_mtx; /* unsupported */
736 			}
737 			cnt = kt->pfrkt_cnt;
738 			if (cnt == 0)
739 				rpool->tblidx = 0;
740 			else
741 				rpool->tblidx = (int)(hashidx % cnt);
742 			memset(&rpool->counter, 0, sizeof(rpool->counter));
743 			if (prefer_ipv6_nexthop)
744 				wanted_af = AF_INET6;
745 		retry_other_af_srchash:
746 			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
747 			    wanted_af, pf_islinklocal, false)) {
748 				/* Retry with IPv4 nexthop for IPv4 traffic */
749 				if (prefer_ipv6_nexthop &&
750 				    wanted_af == AF_INET6 &&
751 				    (*naf) == AF_INET) {
752 					wanted_af = AF_INET;
753 					goto retry_other_af_srchash;
754 				} else {
755 					 /* no hosts in wanted AF */
756 					reason = PFRES_MAPFAILED;
757 					goto done_pool_mtx;
758 				}
759 			}
760 			pf_addrcpy(naddr, &rpool->counter, wanted_af);
761 		} else {
762 			pf_poolmask(naddr, raddr, rmask,
763 			    (struct pf_addr *)&hash, wanted_af);
764 		}
765 		break;
766 	    }
767 	case PF_POOL_ROUNDROBIN:
768 	    {
769 		struct pf_kpooladdr *acur = rpool->cur;
770 
771 	retry_other_af_rr:
772 		if (prefer_ipv6_nexthop)
773 			wanted_af = rpool->ipv6_nexthop_af;
774 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
775 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
776 			    &rpool->tblidx, &rpool->counter, wanted_af,
777 			    NULL, true))
778 				goto get_addr;
779 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
780 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
781 			    &rpool->tblidx, &rpool->counter, wanted_af,
782 			    pf_islinklocal, true))
783 				goto get_addr;
784 		} else if (rpool->cur->af == wanted_af &&
785 		    pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af))
786 			goto get_addr;
787 		if (prefer_ipv6_nexthop &&
788 		    (*naf) == AF_INET && wanted_af == AF_INET6) {
789 			/* Reset table index when changing wanted AF. */
790 			rpool->tblidx = -1;
791 			rpool->ipv6_nexthop_af = AF_INET;
792 			goto retry_other_af_rr;
793 		}
794 	try_next:
795 		/* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */
796 		rpool->ipv6_nexthop_af = AF_INET6;
797 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
798 			rpool->cur = TAILQ_FIRST(&rpool->list);
799 		else
800 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
801 	try_next_ipv6_nexthop_rr:
802 		/* Reset table index when iterating pools or changing wanted AF. */
803 		rpool->tblidx = -1;
804 		if (prefer_ipv6_nexthop)
805 			wanted_af = rpool->ipv6_nexthop_af;
806 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
807 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
808 			    &rpool->tblidx, &rpool->counter, wanted_af, NULL,
809 			    true))
810 				goto get_addr;
811 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
812 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
813 			    &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal,
814 			    true))
815 				goto get_addr;
816 		} else {
817 			if (rpool->cur->af == wanted_af) {
818 				raddr = &rpool->cur->addr.v.a.addr;
819 				rmask = &rpool->cur->addr.v.a.mask;
820 				pf_addrcpy(&rpool->counter, raddr, wanted_af);
821 				goto get_addr;
822 			}
823 		}
824 		if (prefer_ipv6_nexthop &&
825 		    (*naf) == AF_INET && wanted_af == AF_INET6) {
826 			rpool->ipv6_nexthop_af = AF_INET;
827 			goto try_next_ipv6_nexthop_rr;
828 		}
829 		if (rpool->cur != acur)
830 			goto try_next;
831 		reason = PFRES_MAPFAILED;
832 		goto done_pool_mtx;
833 	get_addr:
834 		pf_addrcpy(naddr, &rpool->counter, wanted_af);
835 		if (init_addr != NULL && PF_AZERO(init_addr, wanted_af))
836 			pf_addrcpy(init_addr, naddr, wanted_af);
837 		pf_addr_inc(&rpool->counter, wanted_af);
838 		break;
839 	    }
840 	}
841 
842 	if (wanted_af == 0) {
843 		reason = PFRES_MAPFAILED;
844 		goto done_pool_mtx;
845 	}
846 
847 	if (nkif)
848 		*nkif = rpool->cur->kif;
849 
850 	(*naf) = wanted_af;
851 
852 done_pool_mtx:
853 	mtx_unlock(&rpool->mtx);
854 
855 	return (reason);
856 }
857 
858 u_short
pf_map_addr_sn(sa_family_t saf,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,sa_family_t * naf,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_kpool * rpool,pf_sn_types_t sn_type)859 pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
860     struct pf_addr *naddr, sa_family_t *naf, struct pfi_kkif **nkif,
861     struct pf_addr *init_addr, struct pf_kpool *rpool, pf_sn_types_t sn_type)
862 {
863 	struct pf_ksrc_node	*sn = NULL;
864 	struct pf_srchash	*sh = NULL;
865 	u_short			 reason = 0;
866 
867 	/*
868 	 * If this is a sticky-address rule, try to find an existing src_node.
869 	 */
870 	if (rpool->opts & PF_POOL_STICKYADDR &&
871 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
872 		sn = pf_find_src_node(saddr, r, saf, &sh, sn_type, false);
873 
874 	if (sn != NULL) {
875 		PF_SRC_NODE_LOCK_ASSERT(sn);
876 		(*naf) = sn->raf;
877 
878 		/* If the supplied address is the same as the current one we've
879 		 * been asked before, so tell the caller that there's no other
880 		 * address to be had. */
881 
882 		if (PF_AEQ(naddr, &(sn->raddr), *naf)) {
883 			printf("%s: no more addresses\n", __func__);
884 			reason = PFRES_MAPFAILED;
885 			goto done;
886 		}
887 
888 		pf_addrcpy(naddr, &(sn->raddr), *naf);
889 
890 		if (nkif)
891 			*nkif = sn->rkif;
892 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
893 			printf("%s: src tracking maps ", __func__);
894 			pf_print_host(saddr, 0, saf);
895 			printf(" to ");
896 			pf_print_host(naddr, 0, *naf);
897 			if (nkif)
898 				printf("@%s", (*nkif)->pfik_name);
899 			printf("\n");
900 		}
901 		goto done;
902 	}
903 
904 	/*
905 	 * Source node has not been found. Find a new address and store it
906 	 * in variables given by the caller.
907 	 */
908 	if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr,
909 	    rpool)) != 0) {
910 		if (V_pf_status.debug >= PF_DEBUG_MISC)
911 			printf("%s: pf_map_addr has failed\n", __func__);
912 		goto done;
913 	}
914 
915 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
916 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
917 		printf("%s: selected address ", __func__);
918 		pf_print_host(naddr, 0, *naf);
919 		if (nkif)
920 			printf("@%s", (*nkif)->pfik_name);
921 		printf("\n");
922 	}
923 
924 done:
925 	if (sn != NULL)
926 		PF_SRC_NODE_UNLOCK(sn);
927 
928 	return (reason);
929 }
930 
931 u_short
pf_get_translation(struct pf_test_ctx * ctx)932 pf_get_translation(struct pf_test_ctx *ctx)
933 {
934 	struct pf_krule	*r = NULL;
935 	u_short		 transerror;
936 
937 	PF_RULES_RASSERT();
938 	KASSERT(ctx->sk == NULL, ("*skp not NULL"));
939 	KASSERT(ctx->nk == NULL, ("*nkp not NULL"));
940 
941 	ctx->nr = NULL;
942 
943 	if (ctx->pd->dir == PF_OUT) {
944 		r = pf_match_translation(PF_RULESET_BINAT, ctx);
945 		if (r == NULL)
946 			r = pf_match_translation(PF_RULESET_NAT, ctx);
947 	} else {
948 		r = pf_match_translation(PF_RULESET_RDR, ctx);
949 		if (r == NULL)
950 			r = pf_match_translation(PF_RULESET_BINAT, ctx);
951 	}
952 
953 	if (r == NULL)
954 		return (PFRES_MAX);
955 
956 	switch (r->action) {
957 	case PF_NONAT:
958 	case PF_NOBINAT:
959 	case PF_NORDR:
960 		return (PFRES_MAX);
961 	}
962 
963 	transerror = pf_get_transaddr(ctx, r, r->action, &(r->rdr));
964 	if (transerror == PFRES_MATCH)
965 		ctx->nr = r;
966 
967 	return (transerror);
968 }
969 
970 u_short
pf_get_transaddr(struct pf_test_ctx * ctx,struct pf_krule * r,uint8_t nat_action,struct pf_kpool * rpool)971 pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
972     uint8_t nat_action, struct pf_kpool *rpool)
973 {
974 	struct pf_pdesc	*pd = ctx->pd;
975 	struct pf_addr	*naddr;
976 	uint16_t	*nportp;
977 	uint16_t	 low, high;
978 	u_short		 reason;
979 
980 	PF_RULES_RASSERT();
981 	KASSERT(r != NULL, ("r is NULL"));
982 	KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule"));
983 
984 	if (ctx->sk == NULL && ctx->nk == NULL) {
985 		if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &ctx->sk,
986 		    &ctx->nk))
987 			return (PFRES_MEMORY);
988 	}
989 
990 	naddr = &ctx->nk->addr[1];
991 	nportp = &ctx->nk->port[1];
992 
993 	switch (nat_action) {
994 	case PF_NAT:
995 		if (pd->proto == IPPROTO_ICMP) {
996 			low = 1;
997 			high = 65535;
998 		} else {
999 			low  = rpool->proxy_port[0];
1000 			high = rpool->proxy_port[1];
1001 		}
1002 		if (rpool->mape.offset > 0) {
1003 			if (pf_get_mape_sport(pd, r, naddr, nportp,
1004 			    &ctx->udp_mapping, rpool)) {
1005 				DPFPRINTF(PF_DEBUG_MISC,
1006 				    "pf: MAP-E port allocation (%u/%u/%u)"
1007 				    " failed",
1008 				    rpool->mape.offset,
1009 				    rpool->mape.psidlen,
1010 				    rpool->mape.psid);
1011 				reason = PFRES_MAPFAILED;
1012 				goto notrans;
1013 			}
1014 		} else if (pf_get_sport(pd, r, naddr, nportp, low, high,
1015 		    rpool, &ctx->udp_mapping, PF_SN_NAT)) {
1016 			DPFPRINTF(PF_DEBUG_MISC,
1017 			    "pf: NAT proxy port allocation (%u-%u) failed",
1018 			    rpool->proxy_port[0], rpool->proxy_port[1]);
1019 			reason = PFRES_MAPFAILED;
1020 			goto notrans;
1021 		}
1022 		break;
1023 	case PF_BINAT:
1024 		switch (pd->dir) {
1025 		case PF_OUT:
1026 			if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){
1027 				switch (pd->af) {
1028 #ifdef INET
1029 				case AF_INET:
1030 					if (rpool->cur->addr.p.dyn->
1031 					    pfid_acnt4 < 1) {
1032 						reason = PFRES_MAPFAILED;
1033 						goto notrans;
1034 					}
1035 					pf_poolmask(naddr,
1036 					    &rpool->cur->addr.p.dyn->pfid_addr4,
1037 					    &rpool->cur->addr.p.dyn->pfid_mask4,
1038 					    &pd->nsaddr, AF_INET);
1039 					break;
1040 #endif /* INET */
1041 #ifdef INET6
1042 				case AF_INET6:
1043 					if (rpool->cur->addr.p.dyn->
1044 					    pfid_acnt6 < 1) {
1045 						reason = PFRES_MAPFAILED;
1046 						goto notrans;
1047 					}
1048 					pf_poolmask(naddr,
1049 					    &rpool->cur->addr.p.dyn->pfid_addr6,
1050 					    &rpool->cur->addr.p.dyn->pfid_mask6,
1051 					    &pd->nsaddr, AF_INET6);
1052 					break;
1053 #endif /* INET6 */
1054 				}
1055 			} else
1056 				pf_poolmask(naddr,
1057 				    &rpool->cur->addr.v.a.addr,
1058 				    &rpool->cur->addr.v.a.mask, &pd->nsaddr,
1059 				    pd->af);
1060 			break;
1061 		case PF_IN:
1062 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
1063 				switch (pd->af) {
1064 #ifdef INET
1065 				case AF_INET:
1066 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
1067 						reason = PFRES_MAPFAILED;
1068 						goto notrans;
1069 					}
1070 					pf_poolmask(naddr,
1071 					    &r->src.addr.p.dyn->pfid_addr4,
1072 					    &r->src.addr.p.dyn->pfid_mask4,
1073 					    &pd->ndaddr, AF_INET);
1074 					break;
1075 #endif /* INET */
1076 #ifdef INET6
1077 				case AF_INET6:
1078 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
1079 						reason = PFRES_MAPFAILED;
1080 						goto notrans;
1081 					}
1082 					pf_poolmask(naddr,
1083 					    &r->src.addr.p.dyn->pfid_addr6,
1084 					    &r->src.addr.p.dyn->pfid_mask6,
1085 					    &pd->ndaddr, AF_INET6);
1086 					break;
1087 #endif /* INET6 */
1088 				}
1089 			} else
1090 				pf_poolmask(naddr, &r->src.addr.v.a.addr,
1091 				    &r->src.addr.v.a.mask, &pd->ndaddr, pd->af);
1092 			break;
1093 		}
1094 		break;
1095 	case PF_RDR: {
1096 		struct pf_state_key_cmp key;
1097 		int tries;
1098 		uint16_t cut, low, high, nport;
1099 
1100 		reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr,
1101 		    &(pd->naf), NULL, NULL, rpool, PF_SN_NAT);
1102 
1103 		if (reason != 0)
1104 			goto notrans;
1105 		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
1106 			pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask,
1107 			    &pd->ndaddr, pd->af);
1108 
1109 		/* Do not change SCTP ports. */
1110 		if (pd->proto == IPPROTO_SCTP)
1111 			break;
1112 
1113 		if (rpool->proxy_port[1]) {
1114 			uint32_t	tmp_nport;
1115 			uint16_t	div;
1116 
1117 			div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1;
1118 			div = (div == 0) ? 1 : div;
1119 
1120 			tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) +
1121 			    rpool->proxy_port[0];
1122 
1123 			/* Wrap around if necessary. */
1124 			if (tmp_nport > 65535)
1125 				tmp_nport -= 65535;
1126 			nport = htons((uint16_t)tmp_nport);
1127 		} else if (rpool->proxy_port[0])
1128 			nport = htons(rpool->proxy_port[0]);
1129 		else
1130 			nport = pd->ndport;
1131 
1132 		/*
1133 		 * Update the destination port.
1134 		 */
1135 		*nportp = nport;
1136 
1137 		/*
1138 		 * Do we have a source port conflict in the stack state?  Try to
1139 		 * modulate the source port if so.  Note that this is racy since
1140 		 * the state lookup may not find any matches here but will once
1141 		 * pf_create_state() actually instantiates the state.
1142 		 */
1143 		bzero(&key, sizeof(key));
1144 		key.af = pd->af;
1145 		key.proto = pd->proto;
1146 		key.port[0] = pd->nsport;
1147 		pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af);
1148 		key.port[1] = nport;
1149 		pf_addrcpy(&key.addr[1], naddr, key.af);
1150 
1151 		if (!pf_find_state_all_exists(&key, PF_OUT))
1152 			break;
1153 
1154 		tries = 0;
1155 
1156 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
1157 		high = 65535;
1158 		cut = arc4random() % (1 + high - low) + low;
1159 		for (uint32_t tmp = cut;
1160 		    tmp <= high && tmp <= UINT16_MAX &&
1161 		    tries < V_pf_rdr_srcport_rewrite_tries;
1162 		    tmp++, tries++) {
1163 			key.port[0] = htons(tmp);
1164 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
1165 				/* Update the source port. */
1166 				ctx->nk->port[0] = htons(tmp);
1167 				goto out;
1168 			}
1169 		}
1170 		for (uint32_t tmp = cut - 1;
1171 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
1172 		    tmp--, tries++) {
1173 			key.port[0] = htons(tmp);
1174 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
1175 				/* Update the source port. */
1176 				ctx->nk->port[0] = htons(tmp);
1177 				goto out;
1178 			}
1179 		}
1180 
1181 		/*
1182 		 * We failed to find a match.  Push on ahead anyway, let
1183 		 * pf_state_insert() be the arbiter of whether the state
1184 		 * conflict is tolerable.  In particular, with TCP connections
1185 		 * the state may be reused if the TCP state is terminal.
1186 		 */
1187 		DPFPRINTF(PF_DEBUG_MISC,
1188 		    "pf: RDR source port allocation failed");
1189 		break;
1190 
1191 out:
1192 		DPFPRINTF(PF_DEBUG_MISC,
1193 		    "pf: RDR source port allocation %u->%u",
1194 		    ntohs(pd->nsport), ntohs(ctx->nk->port[0]));
1195 		break;
1196 	}
1197 	default:
1198 		panic("%s: unknown action %u", __func__, r->action);
1199 	}
1200 
1201 	/* Return success only if translation really happened. */
1202 	if (bcmp(ctx->sk, ctx->nk, sizeof(struct pf_state_key_cmp))) {
1203 		return (PFRES_MATCH);
1204 	}
1205 
1206 	reason = PFRES_MAX;
1207 notrans:
1208 	uma_zfree(V_pf_state_key_z, ctx->nk);
1209 	uma_zfree(V_pf_state_key_z, ctx->sk);
1210 	ctx->sk = ctx->nk = NULL;
1211 
1212 	return (reason);
1213 }
1214 
1215 int
pf_get_transaddr_af(struct pf_krule * r,struct pf_pdesc * pd)1216 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
1217 {
1218 #if defined(INET) && defined(INET6)
1219 	struct pf_addr	 ndaddr, nsaddr, naddr;
1220 	u_int16_t	 nport = 0;
1221 	int		 prefixlen = 96;
1222 
1223 	bzero(&nsaddr, sizeof(nsaddr));
1224 	bzero(&ndaddr, sizeof(ndaddr));
1225 
1226 	if (V_pf_status.debug >= PF_DEBUG_MISC) {
1227 		printf("pf: af-to %s %s, ",
1228 		    pd->naf == AF_INET ? "inet" : "inet6",
1229 		    TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr");
1230 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
1231 		printf(" -> ");
1232 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
1233 		printf("\n");
1234 	}
1235 
1236 	if (TAILQ_EMPTY(&r->nat.list))
1237 		panic("pf_get_transaddr_af: no nat pool for source address");
1238 
1239 	/* get source address and port */
1240 	if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0],
1241 	    r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) {
1242 		DPFPRINTF(PF_DEBUG_MISC,
1243 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
1244 		    r->nat.proxy_port[0], r->nat.proxy_port[1]);
1245 		return (-1);
1246 	}
1247 
1248 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
1249 		pd->ndport = ntohs(pd->ndport);
1250 		if (pd->ndport == ICMP6_ECHO_REQUEST)
1251 			pd->ndport = ICMP_ECHO;
1252 		else if (pd->ndport == ICMP6_ECHO_REPLY)
1253 			pd->ndport = ICMP_ECHOREPLY;
1254 		pd->ndport = htons(pd->ndport);
1255 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
1256 		pd->nsport = ntohs(pd->nsport);
1257 		if (pd->ndport == ICMP_ECHO)
1258 			pd->ndport = ICMP6_ECHO_REQUEST;
1259 		else if (pd->ndport == ICMP_ECHOREPLY)
1260 			pd->ndport = ICMP6_ECHO_REPLY;
1261 		pd->nsport = htons(pd->nsport);
1262 	}
1263 
1264 	/* get the destination address and port */
1265 	if (! TAILQ_EMPTY(&r->rdr.list)) {
1266 		if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, &(pd->naf),
1267 		    NULL, NULL, &r->rdr, PF_SN_NAT))
1268 			return (-1);
1269 		if (r->rdr.proxy_port[0])
1270 			pd->ndport = htons(r->rdr.proxy_port[0]);
1271 
1272 		if (pd->naf == AF_INET) {
1273 			/* The prefix is the IPv4 rdr address */
1274 			prefixlen = in_mask2len(
1275 			    (struct in_addr *)&r->rdr.cur->addr.v.a.mask);
1276 			inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1277 			    prefixlen);
1278 		} else {
1279 			/* The prefix is the IPv6 rdr address */
1280 			prefixlen = in6_mask2len(
1281 			    (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL);
1282 			inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1283 			    prefixlen);
1284 		}
1285 	} else {
1286 		if (pd->naf == AF_INET) {
1287 			/* The prefix is the IPv6 dst address */
1288 			prefixlen = in6_mask2len(
1289 			    (struct in6_addr *)&r->dst.addr.v.a.mask, NULL);
1290 			if (prefixlen < 32)
1291 				prefixlen = 96;
1292 			inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr,
1293 			    prefixlen);
1294 		} else {
1295 			/*
1296 			 * The prefix is the IPv6 nat address
1297 			 * (that was stored in pd->nsaddr)
1298 			 */
1299 			prefixlen = in6_mask2len(
1300 			    (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL);
1301 			if (prefixlen > 96)
1302 				prefixlen = 96;
1303 			inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr,
1304 			    prefixlen);
1305 		}
1306 	}
1307 
1308 	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
1309 	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
1310 
1311 	if (V_pf_status.debug >= PF_DEBUG_MISC) {
1312 		printf("pf: af-to %s done, prefixlen %d, ",
1313 		    pd->naf == AF_INET ? "inet" : "inet6",
1314 		    prefixlen);
1315 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
1316 		printf(" -> ");
1317 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
1318 		printf("\n");
1319 	}
1320 
1321 	return (0);
1322 #else
1323 	return (-1);
1324 #endif
1325 }
1326