xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision ae1a0648b05acf798816e7b83b3c10856de5c8e5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 /*
56  * Limit the amount of work we do to find a free source port for redirects that
57  * introduce a state conflict.
58  */
59 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
60 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
61 
62 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
63 
64 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
65 			    struct pf_poolhashkey *, sa_family_t);
66 static struct pf_krule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
67 			    int, struct pfi_kkif *,
68 			    struct pf_addr *, u_int16_t, struct pf_addr *,
69 			    uint16_t, int, struct pf_kanchor_stackframe *);
70 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
71     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
72     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
73     struct pf_udp_mapping **);
74 
75 #define mix(a,b,c) \
76 	do {					\
77 		a -= b; a -= c; a ^= (c >> 13);	\
78 		b -= c; b -= a; b ^= (a << 8);	\
79 		c -= a; c -= b; c ^= (b >> 13);	\
80 		a -= b; a -= c; a ^= (c >> 12);	\
81 		b -= c; b -= a; b ^= (a << 16);	\
82 		c -= a; c -= b; c ^= (b >> 5);	\
83 		a -= b; a -= c; a ^= (c >> 3);	\
84 		b -= c; b -= a; b ^= (a << 10);	\
85 		c -= a; c -= b; c ^= (b >> 15);	\
86 	} while (0)
87 
88 /*
89  * hash function based on bridge_hash in if_bridge.c
90  */
91 static void
92 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
93     struct pf_poolhashkey *key, sa_family_t af)
94 {
95 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
96 
97 	switch (af) {
98 #ifdef INET
99 	case AF_INET:
100 		a += inaddr->addr32[0];
101 		b += key->key32[1];
102 		mix(a, b, c);
103 		hash->addr32[0] = c + key->key32[2];
104 		break;
105 #endif /* INET */
106 #ifdef INET6
107 	case AF_INET6:
108 		a += inaddr->addr32[0];
109 		b += inaddr->addr32[2];
110 		mix(a, b, c);
111 		hash->addr32[0] = c;
112 		a += inaddr->addr32[1];
113 		b += inaddr->addr32[3];
114 		c += key->key32[1];
115 		mix(a, b, c);
116 		hash->addr32[1] = c;
117 		a += inaddr->addr32[2];
118 		b += inaddr->addr32[1];
119 		c += key->key32[2];
120 		mix(a, b, c);
121 		hash->addr32[2] = c;
122 		a += inaddr->addr32[3];
123 		b += inaddr->addr32[0];
124 		c += key->key32[3];
125 		mix(a, b, c);
126 		hash->addr32[3] = c;
127 		break;
128 #endif /* INET6 */
129 	}
130 }
131 
132 static struct pf_krule *
133 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
134     struct pfi_kkif *kif, struct pf_addr *saddr, u_int16_t sport,
135     struct pf_addr *daddr, uint16_t dport, int rs_num,
136     struct pf_kanchor_stackframe *anchor_stack)
137 {
138 	struct pf_krule		*r, *rm = NULL;
139 	struct pf_kruleset	*ruleset = NULL;
140 	int			 tag = -1;
141 	int			 rtableid = -1;
142 	int			 asd = 0;
143 
144 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
145 	while (r != NULL) {
146 		struct pf_rule_addr	*src = NULL, *dst = NULL;
147 		struct pf_addr_wrap	*xdst = NULL;
148 
149 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
150 			src = &r->dst;
151 			if (r->rpool.cur != NULL)
152 				xdst = &r->rpool.cur->addr;
153 		} else {
154 			src = &r->src;
155 			dst = &r->dst;
156 		}
157 
158 		pf_counter_u64_add(&r->evaluations, 1);
159 		if (pfi_kkif_match(r->kif, kif) == r->ifnot)
160 			r = r->skip[PF_SKIP_IFP].ptr;
161 		else if (r->direction && r->direction != pd->dir)
162 			r = r->skip[PF_SKIP_DIR].ptr;
163 		else if (r->af && r->af != pd->af)
164 			r = r->skip[PF_SKIP_AF].ptr;
165 		else if (r->proto && r->proto != pd->proto)
166 			r = r->skip[PF_SKIP_PROTO].ptr;
167 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
168 		    src->neg, kif, M_GETFIB(m)))
169 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
170 			    PF_SKIP_DST_ADDR].ptr;
171 		else if (src->port_op && !pf_match_port(src->port_op,
172 		    src->port[0], src->port[1], sport))
173 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
174 			    PF_SKIP_DST_PORT].ptr;
175 		else if (dst != NULL &&
176 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
177 		    M_GETFIB(m)))
178 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
179 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
180 		    0, NULL, M_GETFIB(m)))
181 			r = TAILQ_NEXT(r, entries);
182 		else if (dst != NULL && dst->port_op &&
183 		    !pf_match_port(dst->port_op, dst->port[0],
184 		    dst->port[1], dport))
185 			r = r->skip[PF_SKIP_DST_PORT].ptr;
186 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
187 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
188 			r = TAILQ_NEXT(r, entries);
189 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
190 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
191 		    off, &pd->hdr.tcp), r->os_fingerprint)))
192 			r = TAILQ_NEXT(r, entries);
193 		else {
194 			if (r->tag)
195 				tag = r->tag;
196 			if (r->rtableid >= 0)
197 				rtableid = r->rtableid;
198 			if (r->anchor == NULL) {
199 				rm = r;
200 				if (rm->action == PF_NONAT ||
201 				    rm->action == PF_NORDR ||
202 				    rm->action == PF_NOBINAT) {
203 					rm = NULL;
204 				}
205 				break;
206 			} else
207 				pf_step_into_anchor(anchor_stack, &asd,
208 				    &ruleset, rs_num, &r, NULL, NULL);
209 		}
210 		if (r == NULL)
211 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
212 			    rs_num, &r, NULL, NULL);
213 	}
214 
215 	if (tag > 0 && pf_tag_packet(m, pd, tag))
216 		return (NULL);
217 	if (rtableid >= 0)
218 		M_SETFIB(m, rtableid);
219 
220 	return (rm);
221 }
222 
223 static int
224 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
225     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
226     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
227     uint16_t high, struct pf_ksrc_node **sn,
228     struct pf_udp_mapping **udp_mapping)
229 {
230 	struct pf_state_key_cmp	key;
231 	struct pf_addr		init_addr;
232 	struct pf_srchash	*sh = NULL;
233 
234 	bzero(&init_addr, sizeof(init_addr));
235 
236 	MPASS(*udp_mapping == NULL);
237 
238 	/*
239 	 * If we are UDP and have an existing mapping we can get source port
240 	 * from the mapping. In this case we have to look up the src_node as
241 	 * pf_map_addr would.
242 	 */
243 	if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
244 		struct pf_udp_endpoint_cmp udp_source;
245 
246 		bzero(&udp_source, sizeof(udp_source));
247 		udp_source.af = af;
248 		PF_ACPY(&udp_source.addr, saddr, af);
249 		udp_source.port = sport;
250 		*udp_mapping = pf_udp_mapping_find(&udp_source);
251 		if (*udp_mapping) {
252 			PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
253 			*nport = (*udp_mapping)->endpoints[1].port;
254 			/* Try to find a src_node as per pf_map_addr(). */
255 			if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
256 			    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
257 				*sn = pf_find_src_node(saddr, r, af, &sh, 0);
258 			return (0);
259 		} else {
260 			*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
261 			if (*udp_mapping == NULL)
262 				return (1);
263 		}
264 	}
265 
266 	if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
267 		goto failed;
268 
269 	if (proto == IPPROTO_ICMP) {
270 		if (*nport == htons(ICMP_ECHO)) {
271 			low = 1;
272 			high = 65535;
273 		} else
274 			return (0);	/* Don't try to modify non-echo ICMP */
275 	}
276 #ifdef INET6
277 	if (proto == IPPROTO_ICMPV6) {
278 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
279 			low = 1;
280 			high = 65535;
281 		} else
282 			return (0);	/* Don't try to modify non-echo ICMP */
283 	}
284 #endif /* INET6 */
285 
286 	bzero(&key, sizeof(key));
287 	key.af = af;
288 	key.proto = proto;
289 	key.port[0] = dport;
290 	PF_ACPY(&key.addr[0], daddr, key.af);
291 
292 	do {
293 		PF_ACPY(&key.addr[1], naddr, key.af);
294 		if (*udp_mapping)
295 			PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
296 
297 		/*
298 		 * port search; start random, step;
299 		 * similar 2 portloop in in_pcbbind
300 		 */
301 		if (proto == IPPROTO_SCTP) {
302 			key.port[1] = sport;
303 			if (!pf_find_state_all_exists(&key, PF_IN)) {
304 				*nport = sport;
305 				return (0);
306 			} else {
307 				return (1); /* Fail mapping. */
308 			}
309 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
310 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
311 			/*
312 			 * XXX bug: icmp states don't use the id on both sides.
313 			 * (traceroute -I through nat)
314 			 */
315 			key.port[1] = sport;
316 			if (!pf_find_state_all_exists(&key, PF_IN)) {
317 				*nport = sport;
318 				return (0);
319 			}
320 		} else if (low == high) {
321 			key.port[1] = htons(low);
322 			if (!pf_find_state_all_exists(&key, PF_IN)) {
323 				if (*udp_mapping != NULL) {
324 					(*udp_mapping)->endpoints[1].port = htons(low);
325 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
326 						*nport = htons(low);
327 						return (0);
328 					}
329 				} else {
330 					*nport = htons(low);
331 					return (0);
332 				}
333 			}
334 		} else {
335 			uint32_t tmp;
336 			uint16_t cut;
337 
338 			if (low > high) {
339 				tmp = low;
340 				low = high;
341 				high = tmp;
342 			}
343 			/* low < high */
344 			cut = arc4random() % (1 + high - low) + low;
345 			/* low <= cut <= high */
346 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
347 				if (*udp_mapping != NULL) {
348 					(*udp_mapping)->endpoints[1].port = htons(tmp);
349 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
350 						*nport = htons(tmp);
351 						return (0);
352 					}
353 				} else {
354 					key.port[1] = htons(tmp);
355 					if (!pf_find_state_all_exists(&key, PF_IN)) {
356 						*nport = htons(tmp);
357 						return (0);
358 					}
359 				}
360 			}
361 			tmp = cut;
362 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
363 				if (proto == IPPROTO_UDP &&
364 				    (r->rpool.opts & PF_POOL_ENDPI)) {
365 					(*udp_mapping)->endpoints[1].port = htons(tmp);
366 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
367 						*nport = htons(tmp);
368 						return (0);
369 					}
370 				} else {
371 					key.port[1] = htons(tmp);
372 					if (!pf_find_state_all_exists(&key, PF_IN)) {
373 						*nport = htons(tmp);
374 						return (0);
375 					}
376 				}
377 			}
378 		}
379 
380 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
381 		case PF_POOL_RANDOM:
382 		case PF_POOL_ROUNDROBIN:
383 			/*
384 			 * pick a different source address since we're out
385 			 * of free port choices for the current one.
386 			 */
387 			if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
388 				return (1);
389 			break;
390 		case PF_POOL_NONE:
391 		case PF_POOL_SRCHASH:
392 		case PF_POOL_BITMASK:
393 		default:
394 			return (1);
395 		}
396 	} while (! PF_AEQ(&init_addr, naddr, af) );
397 
398 failed:
399 	uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
400 	*udp_mapping = NULL;
401 	return (1);					/* none available */
402 }
403 
404 static int
405 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
406     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
407     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
408     struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping)
409 {
410 	uint16_t psmask, low, highmask;
411 	uint16_t i, ahigh, cut;
412 	int ashift, psidshift;
413 
414 	ashift = 16 - r->rpool.mape.offset;
415 	psidshift = ashift - r->rpool.mape.psidlen;
416 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
417 	psmask = psmask << psidshift;
418 	highmask = (1U << psidshift) - 1;
419 
420 	ahigh = (1U << r->rpool.mape.offset) - 1;
421 	cut = arc4random() & ahigh;
422 	if (cut == 0)
423 		cut = 1;
424 
425 	for (i = cut; i <= ahigh; i++) {
426 		low = (i << ashift) | psmask;
427 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
428 		    naddr, nport, low, low | highmask, sn, udp_mapping))
429 			return (0);
430 	}
431 	for (i = cut - 1; i > 0; i--) {
432 		low = (i << ashift) | psmask;
433 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
434 		    naddr, nport, low, low | highmask, sn, udp_mapping))
435 			return (0);
436 	}
437 	return (1);
438 }
439 
440 u_short
441 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
442     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
443     struct pf_ksrc_node **sn)
444 {
445 	u_short			 reason = PFRES_MATCH;
446 	struct pf_kpool		*rpool = &r->rpool;
447 	struct pf_addr		*raddr = NULL, *rmask = NULL;
448 	struct pf_srchash	*sh = NULL;
449 
450 	/* Try to find a src_node if none was given and this
451 	   is a sticky-address rule. */
452 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
453 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
454 		*sn = pf_find_src_node(saddr, r, af, &sh, false);
455 
456 	/* If a src_node was found or explicitly given and it has a non-zero
457 	   route address, use this address. A zeroed address is found if the
458 	   src node was created just a moment ago in pf_create_state and it
459 	   needs to be filled in with routing decision calculated here. */
460 	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
461 		/* If the supplied address is the same as the current one we've
462 		 * been asked before, so tell the caller that there's no other
463 		 * address to be had. */
464 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
465 			reason = PFRES_MAPFAILED;
466 			goto done;
467 		}
468 
469 		PF_ACPY(naddr, &(*sn)->raddr, af);
470 		if (nkif)
471 			*nkif = (*sn)->rkif;
472 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
473 			printf("pf_map_addr: src tracking maps ");
474 			pf_print_host(saddr, 0, af);
475 			printf(" to ");
476 			pf_print_host(naddr, 0, af);
477 			if (nkif)
478 				printf("@%s", (*nkif)->pfik_name);
479 			printf("\n");
480 		}
481 		goto done;
482 	}
483 
484 	mtx_lock(&rpool->mtx);
485 	/* Find the route using chosen algorithm. Store the found route
486 	   in src_node if it was given or found. */
487 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
488 		reason = PFRES_MAPFAILED;
489 		goto done_pool_mtx;
490 	}
491 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
492 		switch (af) {
493 #ifdef INET
494 		case AF_INET:
495 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
496 			    (rpool->opts & PF_POOL_TYPEMASK) !=
497 			    PF_POOL_ROUNDROBIN) {
498 				reason = PFRES_MAPFAILED;
499 				goto done_pool_mtx;
500 			}
501 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
502 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
503 			break;
504 #endif /* INET */
505 #ifdef INET6
506 		case AF_INET6:
507 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
508 			    (rpool->opts & PF_POOL_TYPEMASK) !=
509 			    PF_POOL_ROUNDROBIN) {
510 				reason = PFRES_MAPFAILED;
511 				goto done_pool_mtx;
512 			}
513 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
514 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
515 			break;
516 #endif /* INET6 */
517 		}
518 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
519 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
520 			reason = PFRES_MAPFAILED;
521 			goto done_pool_mtx; /* unsupported */
522 		}
523 	} else {
524 		raddr = &rpool->cur->addr.v.a.addr;
525 		rmask = &rpool->cur->addr.v.a.mask;
526 	}
527 
528 	switch (rpool->opts & PF_POOL_TYPEMASK) {
529 	case PF_POOL_NONE:
530 		PF_ACPY(naddr, raddr, af);
531 		break;
532 	case PF_POOL_BITMASK:
533 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
534 		break;
535 	case PF_POOL_RANDOM:
536 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
537 			switch (af) {
538 #ifdef INET
539 			case AF_INET:
540 				rpool->counter.addr32[0] = htonl(arc4random());
541 				break;
542 #endif /* INET */
543 #ifdef INET6
544 			case AF_INET6:
545 				if (rmask->addr32[3] != 0xffffffff)
546 					rpool->counter.addr32[3] =
547 					    htonl(arc4random());
548 				else
549 					break;
550 				if (rmask->addr32[2] != 0xffffffff)
551 					rpool->counter.addr32[2] =
552 					    htonl(arc4random());
553 				else
554 					break;
555 				if (rmask->addr32[1] != 0xffffffff)
556 					rpool->counter.addr32[1] =
557 					    htonl(arc4random());
558 				else
559 					break;
560 				if (rmask->addr32[0] != 0xffffffff)
561 					rpool->counter.addr32[0] =
562 					    htonl(arc4random());
563 				break;
564 #endif /* INET6 */
565 			}
566 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
567 			PF_ACPY(init_addr, naddr, af);
568 
569 		} else {
570 			PF_AINC(&rpool->counter, af);
571 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
572 		}
573 		break;
574 	case PF_POOL_SRCHASH:
575 	    {
576 		unsigned char hash[16];
577 
578 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
579 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
580 		break;
581 	    }
582 	case PF_POOL_ROUNDROBIN:
583 	    {
584 		struct pf_kpooladdr *acur = rpool->cur;
585 
586 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
587 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
588 			    &rpool->tblidx, &rpool->counter, af))
589 				goto get_addr;
590 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
591 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
592 			    &rpool->tblidx, &rpool->counter, af))
593 				goto get_addr;
594 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
595 			goto get_addr;
596 
597 	try_next:
598 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
599 			rpool->cur = TAILQ_FIRST(&rpool->list);
600 		else
601 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
602 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
603 			rpool->tblidx = -1;
604 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
605 			    &rpool->tblidx, &rpool->counter, af)) {
606 				/* table contains no address of type 'af' */
607 				if (rpool->cur != acur)
608 					goto try_next;
609 				reason = PFRES_MAPFAILED;
610 				goto done_pool_mtx;
611 			}
612 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
613 			rpool->tblidx = -1;
614 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
615 			    &rpool->tblidx, &rpool->counter, af)) {
616 				/* table contains no address of type 'af' */
617 				if (rpool->cur != acur)
618 					goto try_next;
619 				reason = PFRES_MAPFAILED;
620 				goto done_pool_mtx;
621 			}
622 		} else {
623 			raddr = &rpool->cur->addr.v.a.addr;
624 			rmask = &rpool->cur->addr.v.a.mask;
625 			PF_ACPY(&rpool->counter, raddr, af);
626 		}
627 
628 	get_addr:
629 		PF_ACPY(naddr, &rpool->counter, af);
630 		if (init_addr != NULL && PF_AZERO(init_addr, af))
631 			PF_ACPY(init_addr, naddr, af);
632 		PF_AINC(&rpool->counter, af);
633 		break;
634 	    }
635 	}
636 
637 	if (nkif)
638 		*nkif = rpool->cur->kif;
639 
640 	if (*sn != NULL) {
641 		PF_ACPY(&(*sn)->raddr, naddr, af);
642 		if (nkif)
643 			(*sn)->rkif = *nkif;
644 	}
645 
646 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
647 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
648 		printf("pf_map_addr: selected address ");
649 		pf_print_host(naddr, 0, af);
650 		if (nkif)
651 			printf("@%s", (*nkif)->pfik_name);
652 		printf("\n");
653 	}
654 
655 done_pool_mtx:
656 	mtx_unlock(&rpool->mtx);
657 
658 done:
659 	if (reason) {
660 		counter_u64_add(V_pf_status.counters[reason], 1);
661 	}
662 
663 	return (reason);
664 }
665 
666 u_short
667 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
668     struct pfi_kkif *kif, struct pf_ksrc_node **sn,
669     struct pf_state_key **skp, struct pf_state_key **nkp,
670     struct pf_addr *saddr, struct pf_addr *daddr,
671     uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
672     struct pf_krule **rp,
673     struct pf_udp_mapping **udp_mapping)
674 {
675 	struct pf_krule	*r = NULL;
676 	struct pf_addr	*naddr;
677 	uint16_t	*nportp;
678 	uint16_t	 low, high;
679 	u_short		 reason;
680 
681 	PF_RULES_RASSERT();
682 	KASSERT(*skp == NULL, ("*skp not NULL"));
683 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
684 
685 	*rp = NULL;
686 
687 	if (pd->dir == PF_OUT) {
688 		r = pf_match_translation(pd, m, off, kif, saddr,
689 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
690 		if (r == NULL)
691 			r = pf_match_translation(pd, m, off, kif,
692 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
693 			    anchor_stack);
694 	} else {
695 		r = pf_match_translation(pd, m, off, kif, saddr,
696 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
697 		if (r == NULL)
698 			r = pf_match_translation(pd, m, off, kif,
699 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
700 			    anchor_stack);
701 	}
702 
703 	if (r == NULL)
704 		return (PFRES_MAX);
705 
706 	switch (r->action) {
707 	case PF_NONAT:
708 	case PF_NOBINAT:
709 	case PF_NORDR:
710 		return (PFRES_MAX);
711 	}
712 
713 	*skp = pf_state_key_setup(pd, m, off, saddr, daddr, sport, dport);
714 	if (*skp == NULL)
715 		return (PFRES_MEMORY);
716 	*nkp = pf_state_key_clone(*skp);
717 	if (*nkp == NULL) {
718 		uma_zfree(V_pf_state_key_z, *skp);
719 		*skp = NULL;
720 		return (PFRES_MEMORY);
721 	}
722 
723 	naddr = &(*nkp)->addr[1];
724 	nportp = &(*nkp)->port[1];
725 
726 	switch (r->action) {
727 	case PF_NAT:
728 		if (pd->proto == IPPROTO_ICMP) {
729 			low = 1;
730 			high = 65535;
731 		} else {
732 			low  = r->rpool.proxy_port[0];
733 			high = r->rpool.proxy_port[1];
734 		}
735 		if (r->rpool.mape.offset > 0) {
736 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
737 			    sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
738 				DPFPRINTF(PF_DEBUG_MISC,
739 				    ("pf: MAP-E port allocation (%u/%u/%u)"
740 				    " failed\n",
741 				    r->rpool.mape.offset,
742 				    r->rpool.mape.psidlen,
743 				    r->rpool.mape.psid));
744 				reason = PFRES_MAPFAILED;
745 				goto notrans;
746 			}
747 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
748 		    daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
749 			DPFPRINTF(PF_DEBUG_MISC,
750 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
751 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
752 			reason = PFRES_MAPFAILED;
753 			goto notrans;
754 		}
755 		break;
756 	case PF_BINAT:
757 		switch (pd->dir) {
758 		case PF_OUT:
759 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
760 				switch (pd->af) {
761 #ifdef INET
762 				case AF_INET:
763 					if (r->rpool.cur->addr.p.dyn->
764 					    pfid_acnt4 < 1) {
765 						reason = PFRES_MAPFAILED;
766 						goto notrans;
767 					}
768 					PF_POOLMASK(naddr,
769 					    &r->rpool.cur->addr.p.dyn->
770 					    pfid_addr4,
771 					    &r->rpool.cur->addr.p.dyn->
772 					    pfid_mask4, saddr, AF_INET);
773 					break;
774 #endif /* INET */
775 #ifdef INET6
776 				case AF_INET6:
777 					if (r->rpool.cur->addr.p.dyn->
778 					    pfid_acnt6 < 1) {
779 						reason = PFRES_MAPFAILED;
780 						goto notrans;
781 					}
782 					PF_POOLMASK(naddr,
783 					    &r->rpool.cur->addr.p.dyn->
784 					    pfid_addr6,
785 					    &r->rpool.cur->addr.p.dyn->
786 					    pfid_mask6, saddr, AF_INET6);
787 					break;
788 #endif /* INET6 */
789 				}
790 			} else
791 				PF_POOLMASK(naddr,
792 				    &r->rpool.cur->addr.v.a.addr,
793 				    &r->rpool.cur->addr.v.a.mask, saddr,
794 				    pd->af);
795 			break;
796 		case PF_IN:
797 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
798 				switch (pd->af) {
799 #ifdef INET
800 				case AF_INET:
801 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
802 						reason = PFRES_MAPFAILED;
803 						goto notrans;
804 					}
805 					PF_POOLMASK(naddr,
806 					    &r->src.addr.p.dyn->pfid_addr4,
807 					    &r->src.addr.p.dyn->pfid_mask4,
808 					    daddr, AF_INET);
809 					break;
810 #endif /* INET */
811 #ifdef INET6
812 				case AF_INET6:
813 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
814 						reason = PFRES_MAPFAILED;
815 						goto notrans;
816 					}
817 					PF_POOLMASK(naddr,
818 					    &r->src.addr.p.dyn->pfid_addr6,
819 					    &r->src.addr.p.dyn->pfid_mask6,
820 					    daddr, AF_INET6);
821 					break;
822 #endif /* INET6 */
823 				}
824 			} else
825 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
826 				    &r->src.addr.v.a.mask, daddr, pd->af);
827 			break;
828 		}
829 		break;
830 	case PF_RDR: {
831 		struct pf_state_key_cmp key;
832 		int tries;
833 		uint16_t cut, low, high, nport;
834 
835 		reason = pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn);
836 		if (reason != 0)
837 			goto notrans;
838 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
839 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
840 			    daddr, pd->af);
841 
842 		/* Do not change SCTP ports. */
843 		if (pd->proto == IPPROTO_SCTP)
844 			break;
845 
846 		if (r->rpool.proxy_port[1]) {
847 			uint32_t	tmp_nport;
848 
849 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
850 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
851 			    1)) + r->rpool.proxy_port[0];
852 
853 			/* Wrap around if necessary. */
854 			if (tmp_nport > 65535)
855 				tmp_nport -= 65535;
856 			nport = htons((uint16_t)tmp_nport);
857 		} else if (r->rpool.proxy_port[0])
858 			nport = htons(r->rpool.proxy_port[0]);
859 		else
860 			nport = dport;
861 
862 		/*
863 		 * Update the destination port.
864 		 */
865 		*nportp = nport;
866 
867 		/*
868 		 * Do we have a source port conflict in the stack state?  Try to
869 		 * modulate the source port if so.  Note that this is racy since
870 		 * the state lookup may not find any matches here but will once
871 		 * pf_create_state() actually instantiates the state.
872 		 */
873 		bzero(&key, sizeof(key));
874 		key.af = pd->af;
875 		key.proto = pd->proto;
876 		key.port[0] = sport;
877 		PF_ACPY(&key.addr[0], saddr, key.af);
878 		key.port[1] = nport;
879 		PF_ACPY(&key.addr[1], naddr, key.af);
880 
881 		if (!pf_find_state_all_exists(&key, PF_OUT))
882 			break;
883 
884 		tries = 0;
885 
886 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
887 		high = 65535;
888 		cut = arc4random() % (1 + high - low) + low;
889 		for (uint32_t tmp = cut;
890 		    tmp <= high && tmp <= UINT16_MAX &&
891 		    tries < V_pf_rdr_srcport_rewrite_tries;
892 		    tmp++, tries++) {
893 			key.port[0] = htons(tmp);
894 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
895 				/* Update the source port. */
896 				(*nkp)->port[0] = htons(tmp);
897 				goto out;
898 			}
899 		}
900 		for (uint32_t tmp = cut - 1;
901 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
902 		    tmp--, tries++) {
903 			key.port[0] = htons(tmp);
904 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
905 				/* Update the source port. */
906 				(*nkp)->port[0] = htons(tmp);
907 				goto out;
908 			}
909 		}
910 
911 		/*
912 		 * We failed to find a match.  Push on ahead anyway, let
913 		 * pf_state_insert() be the arbiter of whether the state
914 		 * conflict is tolerable.  In particular, with TCP connections
915 		 * the state may be reused if the TCP state is terminal.
916 		 */
917 		DPFPRINTF(PF_DEBUG_MISC,
918 		    ("pf: RDR source port allocation failed\n"));
919 		break;
920 
921 out:
922 		DPFPRINTF(PF_DEBUG_MISC,
923 		    ("pf: RDR source port allocation %u->%u\n",
924 		    ntohs(sport), ntohs((*nkp)->port[0])));
925 		break;
926 	}
927 	default:
928 		panic("%s: unknown action %u", __func__, r->action);
929 	}
930 
931 	/* Return success only if translation really happened. */
932 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
933 		*rp = r;
934 		return (PFRES_MATCH);
935 	}
936 
937 	reason = PFRES_MAX;
938 notrans:
939 	uma_zfree(V_pf_state_key_z, *nkp);
940 	uma_zfree(V_pf_state_key_z, *skp);
941 	*skp = *nkp = NULL;
942 	*sn = NULL;
943 
944 	return (reason);
945 }
946