xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision 3f0efe05432b1633991114ca4ca330102a561959)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
56 
57 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
58 			    struct pf_poolhashkey *, sa_family_t);
59 static struct pf_krule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
60 			    int, struct pfi_kkif *,
61 			    struct pf_addr *, u_int16_t, struct pf_addr *,
62 			    uint16_t, int, struct pf_kanchor_stackframe *);
63 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
64     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
65     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
66     struct pf_udp_mapping **);
67 
68 #define mix(a,b,c) \
69 	do {					\
70 		a -= b; a -= c; a ^= (c >> 13);	\
71 		b -= c; b -= a; b ^= (a << 8);	\
72 		c -= a; c -= b; c ^= (b >> 13);	\
73 		a -= b; a -= c; a ^= (c >> 12);	\
74 		b -= c; b -= a; b ^= (a << 16);	\
75 		c -= a; c -= b; c ^= (b >> 5);	\
76 		a -= b; a -= c; a ^= (c >> 3);	\
77 		b -= c; b -= a; b ^= (a << 10);	\
78 		c -= a; c -= b; c ^= (b >> 15);	\
79 	} while (0)
80 
81 /*
82  * hash function based on bridge_hash in if_bridge.c
83  */
84 static void
85 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
86     struct pf_poolhashkey *key, sa_family_t af)
87 {
88 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
89 
90 	switch (af) {
91 #ifdef INET
92 	case AF_INET:
93 		a += inaddr->addr32[0];
94 		b += key->key32[1];
95 		mix(a, b, c);
96 		hash->addr32[0] = c + key->key32[2];
97 		break;
98 #endif /* INET */
99 #ifdef INET6
100 	case AF_INET6:
101 		a += inaddr->addr32[0];
102 		b += inaddr->addr32[2];
103 		mix(a, b, c);
104 		hash->addr32[0] = c;
105 		a += inaddr->addr32[1];
106 		b += inaddr->addr32[3];
107 		c += key->key32[1];
108 		mix(a, b, c);
109 		hash->addr32[1] = c;
110 		a += inaddr->addr32[2];
111 		b += inaddr->addr32[1];
112 		c += key->key32[2];
113 		mix(a, b, c);
114 		hash->addr32[2] = c;
115 		a += inaddr->addr32[3];
116 		b += inaddr->addr32[0];
117 		c += key->key32[3];
118 		mix(a, b, c);
119 		hash->addr32[3] = c;
120 		break;
121 #endif /* INET6 */
122 	}
123 }
124 
125 static struct pf_krule *
126 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
127     struct pfi_kkif *kif, struct pf_addr *saddr, u_int16_t sport,
128     struct pf_addr *daddr, uint16_t dport, int rs_num,
129     struct pf_kanchor_stackframe *anchor_stack)
130 {
131 	struct pf_krule		*r, *rm = NULL;
132 	struct pf_kruleset	*ruleset = NULL;
133 	int			 tag = -1;
134 	int			 rtableid = -1;
135 	int			 asd = 0;
136 
137 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
138 	while (r != NULL) {
139 		struct pf_rule_addr	*src = NULL, *dst = NULL;
140 		struct pf_addr_wrap	*xdst = NULL;
141 
142 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
143 			src = &r->dst;
144 			if (r->rpool.cur != NULL)
145 				xdst = &r->rpool.cur->addr;
146 		} else {
147 			src = &r->src;
148 			dst = &r->dst;
149 		}
150 
151 		pf_counter_u64_add(&r->evaluations, 1);
152 		if (pfi_kkif_match(r->kif, kif) == r->ifnot)
153 			r = r->skip[PF_SKIP_IFP].ptr;
154 		else if (r->direction && r->direction != pd->dir)
155 			r = r->skip[PF_SKIP_DIR].ptr;
156 		else if (r->af && r->af != pd->af)
157 			r = r->skip[PF_SKIP_AF].ptr;
158 		else if (r->proto && r->proto != pd->proto)
159 			r = r->skip[PF_SKIP_PROTO].ptr;
160 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
161 		    src->neg, kif, M_GETFIB(m)))
162 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
163 			    PF_SKIP_DST_ADDR].ptr;
164 		else if (src->port_op && !pf_match_port(src->port_op,
165 		    src->port[0], src->port[1], sport))
166 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
167 			    PF_SKIP_DST_PORT].ptr;
168 		else if (dst != NULL &&
169 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
170 		    M_GETFIB(m)))
171 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
172 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
173 		    0, NULL, M_GETFIB(m)))
174 			r = TAILQ_NEXT(r, entries);
175 		else if (dst != NULL && dst->port_op &&
176 		    !pf_match_port(dst->port_op, dst->port[0],
177 		    dst->port[1], dport))
178 			r = r->skip[PF_SKIP_DST_PORT].ptr;
179 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
180 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
181 			r = TAILQ_NEXT(r, entries);
182 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
183 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
184 		    off, &pd->hdr.tcp), r->os_fingerprint)))
185 			r = TAILQ_NEXT(r, entries);
186 		else {
187 			if (r->tag)
188 				tag = r->tag;
189 			if (r->rtableid >= 0)
190 				rtableid = r->rtableid;
191 			if (r->anchor == NULL) {
192 				rm = r;
193 				if (rm->action == PF_NONAT ||
194 				    rm->action == PF_NORDR ||
195 				    rm->action == PF_NOBINAT) {
196 					rm = NULL;
197 				}
198 				break;
199 			} else
200 				pf_step_into_anchor(anchor_stack, &asd,
201 				    &ruleset, rs_num, &r, NULL, NULL);
202 		}
203 		if (r == NULL)
204 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
205 			    rs_num, &r, NULL, NULL);
206 	}
207 
208 	if (tag > 0 && pf_tag_packet(m, pd, tag))
209 		return (NULL);
210 	if (rtableid >= 0)
211 		M_SETFIB(m, rtableid);
212 
213 	return (rm);
214 }
215 
216 static int
217 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
218     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
219     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
220     uint16_t high, struct pf_ksrc_node **sn,
221     struct pf_udp_mapping **udp_mapping)
222 {
223 	struct pf_state_key_cmp	key;
224 	struct pf_addr		init_addr;
225 	struct pf_srchash	*sh = NULL;
226 
227 	bzero(&init_addr, sizeof(init_addr));
228 
229 	MPASS(*udp_mapping == NULL);
230 
231 	/*
232 	 * If we are UDP and have an existing mapping we can get source port
233 	 * from the mapping. In this case we have to look up the src_node as
234 	 * pf_map_addr would.
235 	 */
236 	if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
237 		struct pf_udp_endpoint_cmp udp_source;
238 
239 		bzero(&udp_source, sizeof(udp_source));
240 		udp_source.af = af;
241 		PF_ACPY(&udp_source.addr, saddr, af);
242 		udp_source.port = sport;
243 		*udp_mapping = pf_udp_mapping_find(&udp_source);
244 		if (*udp_mapping) {
245 			PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
246 			*nport = (*udp_mapping)->endpoints[1].port;
247 			/* Try to find a src_node as per pf_map_addr(). */
248 			if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
249 			    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
250 				*sn = pf_find_src_node(saddr, r, af, &sh, 0);
251 			return (0);
252 		} else {
253 			*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
254 			if (*udp_mapping == NULL)
255 				return (1);
256 		}
257 	}
258 
259 	if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
260 		goto failed;
261 
262 	if (proto == IPPROTO_ICMP) {
263 		if (*nport == htons(ICMP_ECHO)) {
264 			low = 1;
265 			high = 65535;
266 		} else
267 			return (0);	/* Don't try to modify non-echo ICMP */
268 	}
269 #ifdef INET6
270 	if (proto == IPPROTO_ICMPV6) {
271 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
272 			low = 1;
273 			high = 65535;
274 		} else
275 			return (0);	/* Don't try to modify non-echo ICMP */
276 	}
277 #endif /* INET6 */
278 
279 	bzero(&key, sizeof(key));
280 	key.af = af;
281 	key.proto = proto;
282 	key.port[0] = dport;
283 	PF_ACPY(&key.addr[0], daddr, key.af);
284 
285 	do {
286 		PF_ACPY(&key.addr[1], naddr, key.af);
287 		if (*udp_mapping)
288 			PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
289 
290 		/*
291 		 * port search; start random, step;
292 		 * similar 2 portloop in in_pcbbind
293 		 */
294 		if (proto == IPPROTO_SCTP) {
295 			key.port[1] = sport;
296 			if (!pf_find_state_all_exists(&key, PF_IN)) {
297 				*nport = sport;
298 				return (0);
299 			} else {
300 				return (1); /* Fail mapping. */
301 			}
302 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
303 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
304 			/*
305 			 * XXX bug: icmp states don't use the id on both sides.
306 			 * (traceroute -I through nat)
307 			 */
308 			key.port[1] = sport;
309 			if (!pf_find_state_all_exists(&key, PF_IN)) {
310 				*nport = sport;
311 				return (0);
312 			}
313 		} else if (low == high) {
314 			key.port[1] = htons(low);
315 			if (!pf_find_state_all_exists(&key, PF_IN)) {
316 				if (*udp_mapping != NULL) {
317 					(*udp_mapping)->endpoints[1].port = htons(low);
318 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
319 						*nport = htons(low);
320 						return (0);
321 					}
322 				} else {
323 					*nport = htons(low);
324 					return (0);
325 				}
326 			}
327 		} else {
328 			uint32_t tmp;
329 			uint16_t cut;
330 
331 			if (low > high) {
332 				tmp = low;
333 				low = high;
334 				high = tmp;
335 			}
336 			/* low < high */
337 			cut = arc4random() % (1 + high - low) + low;
338 			/* low <= cut <= high */
339 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
340 				if (*udp_mapping != NULL) {
341 					(*udp_mapping)->endpoints[1].port = htons(tmp);
342 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
343 						*nport = htons(tmp);
344 						return (0);
345 					}
346 				} else {
347 					key.port[1] = htons(tmp);
348 					if (!pf_find_state_all_exists(&key, PF_IN)) {
349 						*nport = htons(tmp);
350 						return (0);
351 					}
352 				}
353 			}
354 			tmp = cut;
355 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
356 				if (proto == IPPROTO_UDP &&
357 				    (r->rpool.opts & PF_POOL_ENDPI)) {
358 					(*udp_mapping)->endpoints[1].port = htons(tmp);
359 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
360 						*nport = htons(tmp);
361 						return (0);
362 					}
363 				} else {
364 					key.port[1] = htons(tmp);
365 					if (!pf_find_state_all_exists(&key, PF_IN)) {
366 						*nport = htons(tmp);
367 						return (0);
368 					}
369 				}
370 			}
371 		}
372 
373 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
374 		case PF_POOL_RANDOM:
375 		case PF_POOL_ROUNDROBIN:
376 			/*
377 			 * pick a different source address since we're out
378 			 * of free port choices for the current one.
379 			 */
380 			if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
381 				return (1);
382 			break;
383 		case PF_POOL_NONE:
384 		case PF_POOL_SRCHASH:
385 		case PF_POOL_BITMASK:
386 		default:
387 			return (1);
388 		}
389 	} while (! PF_AEQ(&init_addr, naddr, af) );
390 
391 failed:
392 	uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
393 	*udp_mapping = NULL;
394 	return (1);					/* none available */
395 }
396 
397 static int
398 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
399     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
400     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
401     struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping)
402 {
403 	uint16_t psmask, low, highmask;
404 	uint16_t i, ahigh, cut;
405 	int ashift, psidshift;
406 
407 	ashift = 16 - r->rpool.mape.offset;
408 	psidshift = ashift - r->rpool.mape.psidlen;
409 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
410 	psmask = psmask << psidshift;
411 	highmask = (1U << psidshift) - 1;
412 
413 	ahigh = (1U << r->rpool.mape.offset) - 1;
414 	cut = arc4random() & ahigh;
415 	if (cut == 0)
416 		cut = 1;
417 
418 	for (i = cut; i <= ahigh; i++) {
419 		low = (i << ashift) | psmask;
420 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
421 		    naddr, nport, low, low | highmask, sn, udp_mapping))
422 			return (0);
423 	}
424 	for (i = cut - 1; i > 0; i--) {
425 		low = (i << ashift) | psmask;
426 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
427 		    naddr, nport, low, low | highmask, sn, udp_mapping))
428 			return (0);
429 	}
430 	return (1);
431 }
432 
433 u_short
434 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
435     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
436     struct pf_ksrc_node **sn)
437 {
438 	u_short			 reason = 0;
439 	struct pf_kpool		*rpool = &r->rpool;
440 	struct pf_addr		*raddr = NULL, *rmask = NULL;
441 	struct pf_srchash	*sh = NULL;
442 
443 	/* Try to find a src_node if none was given and this
444 	   is a sticky-address rule. */
445 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
446 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
447 		*sn = pf_find_src_node(saddr, r, af, &sh, false);
448 
449 	/* If a src_node was found or explicitly given and it has a non-zero
450 	   route address, use this address. A zeroed address is found if the
451 	   src node was created just a moment ago in pf_create_state and it
452 	   needs to be filled in with routing decision calculated here. */
453 	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
454 		/* If the supplied address is the same as the current one we've
455 		 * been asked before, so tell the caller that there's no other
456 		 * address to be had. */
457 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
458 			reason = PFRES_MAPFAILED;
459 			goto done;
460 		}
461 
462 		PF_ACPY(naddr, &(*sn)->raddr, af);
463 		if (nkif)
464 			*nkif = (*sn)->rkif;
465 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
466 			printf("pf_map_addr: src tracking maps ");
467 			pf_print_host(saddr, 0, af);
468 			printf(" to ");
469 			pf_print_host(naddr, 0, af);
470 			if (nkif)
471 				printf("@%s", (*nkif)->pfik_name);
472 			printf("\n");
473 		}
474 		goto done;
475 	}
476 
477 	mtx_lock(&rpool->mtx);
478 	/* Find the route using chosen algorithm. Store the found route
479 	   in src_node if it was given or found. */
480 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
481 		reason = PFRES_MAPFAILED;
482 		goto done_pool_mtx;
483 	}
484 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
485 		switch (af) {
486 #ifdef INET
487 		case AF_INET:
488 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
489 			    (rpool->opts & PF_POOL_TYPEMASK) !=
490 			    PF_POOL_ROUNDROBIN) {
491 				reason = PFRES_MAPFAILED;
492 				goto done_pool_mtx;
493 			}
494 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
495 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
496 			break;
497 #endif /* INET */
498 #ifdef INET6
499 		case AF_INET6:
500 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
501 			    (rpool->opts & PF_POOL_TYPEMASK) !=
502 			    PF_POOL_ROUNDROBIN) {
503 				reason = PFRES_MAPFAILED;
504 				goto done_pool_mtx;
505 			}
506 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
507 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
508 			break;
509 #endif /* INET6 */
510 		}
511 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
512 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
513 			reason = PFRES_MAPFAILED;
514 			goto done_pool_mtx; /* unsupported */
515 		}
516 	} else {
517 		raddr = &rpool->cur->addr.v.a.addr;
518 		rmask = &rpool->cur->addr.v.a.mask;
519 	}
520 
521 	switch (rpool->opts & PF_POOL_TYPEMASK) {
522 	case PF_POOL_NONE:
523 		PF_ACPY(naddr, raddr, af);
524 		break;
525 	case PF_POOL_BITMASK:
526 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
527 		break;
528 	case PF_POOL_RANDOM:
529 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
530 			switch (af) {
531 #ifdef INET
532 			case AF_INET:
533 				rpool->counter.addr32[0] = htonl(arc4random());
534 				break;
535 #endif /* INET */
536 #ifdef INET6
537 			case AF_INET6:
538 				if (rmask->addr32[3] != 0xffffffff)
539 					rpool->counter.addr32[3] =
540 					    htonl(arc4random());
541 				else
542 					break;
543 				if (rmask->addr32[2] != 0xffffffff)
544 					rpool->counter.addr32[2] =
545 					    htonl(arc4random());
546 				else
547 					break;
548 				if (rmask->addr32[1] != 0xffffffff)
549 					rpool->counter.addr32[1] =
550 					    htonl(arc4random());
551 				else
552 					break;
553 				if (rmask->addr32[0] != 0xffffffff)
554 					rpool->counter.addr32[0] =
555 					    htonl(arc4random());
556 				break;
557 #endif /* INET6 */
558 			}
559 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
560 			PF_ACPY(init_addr, naddr, af);
561 
562 		} else {
563 			PF_AINC(&rpool->counter, af);
564 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
565 		}
566 		break;
567 	case PF_POOL_SRCHASH:
568 	    {
569 		unsigned char hash[16];
570 
571 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
572 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
573 		break;
574 	    }
575 	case PF_POOL_ROUNDROBIN:
576 	    {
577 		struct pf_kpooladdr *acur = rpool->cur;
578 
579 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
580 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
581 			    &rpool->tblidx, &rpool->counter, af))
582 				goto get_addr;
583 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
584 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
585 			    &rpool->tblidx, &rpool->counter, af))
586 				goto get_addr;
587 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
588 			goto get_addr;
589 
590 	try_next:
591 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
592 			rpool->cur = TAILQ_FIRST(&rpool->list);
593 		else
594 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
595 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
596 			rpool->tblidx = -1;
597 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
598 			    &rpool->tblidx, &rpool->counter, af)) {
599 				/* table contains no address of type 'af' */
600 				if (rpool->cur != acur)
601 					goto try_next;
602 				reason = PFRES_MAPFAILED;
603 				goto done_pool_mtx;
604 			}
605 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
606 			rpool->tblidx = -1;
607 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
608 			    &rpool->tblidx, &rpool->counter, af)) {
609 				/* table contains no address of type 'af' */
610 				if (rpool->cur != acur)
611 					goto try_next;
612 				reason = PFRES_MAPFAILED;
613 				goto done_pool_mtx;
614 			}
615 		} else {
616 			raddr = &rpool->cur->addr.v.a.addr;
617 			rmask = &rpool->cur->addr.v.a.mask;
618 			PF_ACPY(&rpool->counter, raddr, af);
619 		}
620 
621 	get_addr:
622 		PF_ACPY(naddr, &rpool->counter, af);
623 		if (init_addr != NULL && PF_AZERO(init_addr, af))
624 			PF_ACPY(init_addr, naddr, af);
625 		PF_AINC(&rpool->counter, af);
626 		break;
627 	    }
628 	}
629 
630 	if (nkif)
631 		*nkif = rpool->cur->kif;
632 
633 	if (*sn != NULL) {
634 		PF_ACPY(&(*sn)->raddr, naddr, af);
635 		if (nkif)
636 			(*sn)->rkif = *nkif;
637 	}
638 
639 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
640 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
641 		printf("pf_map_addr: selected address ");
642 		pf_print_host(naddr, 0, af);
643 		if (nkif)
644 			printf("@%s", (*nkif)->pfik_name);
645 		printf("\n");
646 	}
647 
648 done_pool_mtx:
649 	mtx_unlock(&rpool->mtx);
650 
651 done:
652 	if (reason) {
653 		counter_u64_add(V_pf_status.counters[reason], 1);
654 	}
655 
656 	return (reason);
657 }
658 
659 u_short
660 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
661     struct pfi_kkif *kif, struct pf_ksrc_node **sn,
662     struct pf_state_key **skp, struct pf_state_key **nkp,
663     struct pf_addr *saddr, struct pf_addr *daddr,
664     uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
665     struct pf_krule **rp,
666     struct pf_udp_mapping **udp_mapping)
667 {
668 	struct pf_krule	*r = NULL;
669 	struct pf_addr	*naddr;
670 	uint16_t	*nportp;
671 	uint16_t	 low, high;
672 	u_short		 reason;
673 
674 	PF_RULES_RASSERT();
675 	KASSERT(*skp == NULL, ("*skp not NULL"));
676 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
677 
678 	*rp = NULL;
679 
680 	if (pd->dir == PF_OUT) {
681 		r = pf_match_translation(pd, m, off, kif, saddr,
682 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
683 		if (r == NULL)
684 			r = pf_match_translation(pd, m, off, kif,
685 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
686 			    anchor_stack);
687 	} else {
688 		r = pf_match_translation(pd, m, off, kif, saddr,
689 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
690 		if (r == NULL)
691 			r = pf_match_translation(pd, m, off, kif,
692 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
693 			    anchor_stack);
694 	}
695 
696 	if (r == NULL)
697 		return (PFRES_MAX);
698 
699 	switch (r->action) {
700 	case PF_NONAT:
701 	case PF_NOBINAT:
702 	case PF_NORDR:
703 		return (PFRES_MAX);
704 	}
705 
706 	*skp = pf_state_key_setup(pd, m, off, saddr, daddr, sport, dport);
707 	if (*skp == NULL)
708 		return (PFRES_MEMORY);
709 	*nkp = pf_state_key_clone(*skp);
710 	if (*nkp == NULL) {
711 		uma_zfree(V_pf_state_key_z, *skp);
712 		*skp = NULL;
713 		return (PFRES_MEMORY);
714 	}
715 
716 	naddr = &(*nkp)->addr[1];
717 	nportp = &(*nkp)->port[1];
718 
719 	switch (r->action) {
720 	case PF_NAT:
721 		if (pd->proto == IPPROTO_ICMP) {
722 			low = 1;
723 			high = 65535;
724 		} else {
725 			low  = r->rpool.proxy_port[0];
726 			high = r->rpool.proxy_port[1];
727 		}
728 		if (r->rpool.mape.offset > 0) {
729 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
730 			    sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
731 				DPFPRINTF(PF_DEBUG_MISC,
732 				    ("pf: MAP-E port allocation (%u/%u/%u)"
733 				    " failed\n",
734 				    r->rpool.mape.offset,
735 				    r->rpool.mape.psidlen,
736 				    r->rpool.mape.psid));
737 				reason = PFRES_MAPFAILED;
738 				goto notrans;
739 			}
740 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
741 		    daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
742 			DPFPRINTF(PF_DEBUG_MISC,
743 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
744 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
745 			reason = PFRES_MAPFAILED;
746 			goto notrans;
747 		}
748 		break;
749 	case PF_BINAT:
750 		switch (pd->dir) {
751 		case PF_OUT:
752 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
753 				switch (pd->af) {
754 #ifdef INET
755 				case AF_INET:
756 					if (r->rpool.cur->addr.p.dyn->
757 					    pfid_acnt4 < 1) {
758 						reason = PFRES_MAPFAILED;
759 						goto notrans;
760 					}
761 					PF_POOLMASK(naddr,
762 					    &r->rpool.cur->addr.p.dyn->
763 					    pfid_addr4,
764 					    &r->rpool.cur->addr.p.dyn->
765 					    pfid_mask4, saddr, AF_INET);
766 					break;
767 #endif /* INET */
768 #ifdef INET6
769 				case AF_INET6:
770 					if (r->rpool.cur->addr.p.dyn->
771 					    pfid_acnt6 < 1) {
772 						reason = PFRES_MAPFAILED;
773 						goto notrans;
774 					}
775 					PF_POOLMASK(naddr,
776 					    &r->rpool.cur->addr.p.dyn->
777 					    pfid_addr6,
778 					    &r->rpool.cur->addr.p.dyn->
779 					    pfid_mask6, saddr, AF_INET6);
780 					break;
781 #endif /* INET6 */
782 				}
783 			} else
784 				PF_POOLMASK(naddr,
785 				    &r->rpool.cur->addr.v.a.addr,
786 				    &r->rpool.cur->addr.v.a.mask, saddr,
787 				    pd->af);
788 			break;
789 		case PF_IN:
790 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
791 				switch (pd->af) {
792 #ifdef INET
793 				case AF_INET:
794 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
795 						reason = PFRES_MAPFAILED;
796 						goto notrans;
797 					}
798 					PF_POOLMASK(naddr,
799 					    &r->src.addr.p.dyn->pfid_addr4,
800 					    &r->src.addr.p.dyn->pfid_mask4,
801 					    daddr, AF_INET);
802 					break;
803 #endif /* INET */
804 #ifdef INET6
805 				case AF_INET6:
806 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
807 						reason = PFRES_MAPFAILED;
808 						goto notrans;
809 					}
810 					PF_POOLMASK(naddr,
811 					    &r->src.addr.p.dyn->pfid_addr6,
812 					    &r->src.addr.p.dyn->pfid_mask6,
813 					    daddr, AF_INET6);
814 					break;
815 #endif /* INET6 */
816 				}
817 			} else
818 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
819 				    &r->src.addr.v.a.mask, daddr, pd->af);
820 			break;
821 		}
822 		break;
823 	case PF_RDR: {
824 		struct pf_state_key_cmp key;
825 		uint16_t cut, low, high, nport;
826 
827 		reason = pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn);
828 		if (reason != 0)
829 			goto notrans;
830 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
831 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
832 			    daddr, pd->af);
833 
834 		/* Do not change SCTP ports. */
835 		if (pd->proto == IPPROTO_SCTP)
836 			break;
837 
838 		if (r->rpool.proxy_port[1]) {
839 			uint32_t	tmp_nport;
840 
841 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
842 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
843 			    1)) + r->rpool.proxy_port[0];
844 
845 			/* Wrap around if necessary. */
846 			if (tmp_nport > 65535)
847 				tmp_nport -= 65535;
848 			nport = htons((uint16_t)tmp_nport);
849 		} else if (r->rpool.proxy_port[0])
850 			nport = htons(r->rpool.proxy_port[0]);
851 		else
852 			nport = dport;
853 
854 		/*
855 		 * Update the destination port.
856 		 */
857 		*nportp = nport;
858 
859 		/*
860 		 * Do we have a source port conflict in the stack state?  Try to
861 		 * modulate the source port if so.  Note that this is racy since
862 		 * the state lookup may not find any matches here but will once
863 		 * pf_create_state() actually instantiates the state.
864 		 */
865 		bzero(&key, sizeof(key));
866 		key.af = pd->af;
867 		key.proto = pd->proto;
868 		key.port[0] = sport;
869 		PF_ACPY(&key.addr[0], saddr, key.af);
870 		key.port[1] = nport;
871 		PF_ACPY(&key.addr[1], naddr, key.af);
872 
873 		if (!pf_find_state_all_exists(&key, PF_OUT))
874 			break;
875 
876 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
877 		high = 65535;
878 		cut = arc4random() % (1 + high - low) + low;
879 		for (uint32_t tmp = cut;
880 		    tmp <= high && tmp <= UINT16_MAX; tmp++) {
881 			key.port[0] = htons(tmp);
882 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
883 				/* Update the source port. */
884 				(*nkp)->port[0] = htons(tmp);
885 				goto out;
886 			}
887 		}
888 		for (uint32_t tmp = cut - 1; tmp >= low; tmp--) {
889 			key.port[0] = htons(tmp);
890 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
891 				/* Update the source port. */
892 				(*nkp)->port[0] = htons(tmp);
893 				goto out;
894 			}
895 		}
896 
897 		DPFPRINTF(PF_DEBUG_MISC,
898 		    ("pf: RDR source port allocation failed\n"));
899 		reason = PFRES_MAPFAILED;
900 		goto notrans;
901 
902 out:
903 		DPFPRINTF(PF_DEBUG_MISC,
904 		    ("pf: RDR source port allocation %u->%u\n",
905 		    ntohs(sport), ntohs((*nkp)->port[0])));
906 		break;
907 	}
908 	default:
909 		panic("%s: unknown action %u", __func__, r->action);
910 	}
911 
912 	/* Return success only if translation really happened. */
913 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
914 		*rp = r;
915 		return (PFRES_MATCH);
916 	}
917 
918 	reason = PFRES_MAX;
919 notrans:
920 	uma_zfree(V_pf_state_key_z, *nkp);
921 	uma_zfree(V_pf_state_key_z, *skp);
922 	*skp = *nkp = NULL;
923 	*sn = NULL;
924 
925 	return (reason);
926 }
927