xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision 1b9cfd6a625dc82611846cb9a53c1886f7af3758)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
56 
57 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
58 			    struct pf_poolhashkey *, sa_family_t);
59 static struct pf_krule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
60 			    int, struct pfi_kkif *,
61 			    struct pf_addr *, u_int16_t, struct pf_addr *,
62 			    uint16_t, int, struct pf_kanchor_stackframe *);
63 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
64     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
65     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **);
66 
67 #define mix(a,b,c) \
68 	do {					\
69 		a -= b; a -= c; a ^= (c >> 13);	\
70 		b -= c; b -= a; b ^= (a << 8);	\
71 		c -= a; c -= b; c ^= (b >> 13);	\
72 		a -= b; a -= c; a ^= (c >> 12);	\
73 		b -= c; b -= a; b ^= (a << 16);	\
74 		c -= a; c -= b; c ^= (b >> 5);	\
75 		a -= b; a -= c; a ^= (c >> 3);	\
76 		b -= c; b -= a; b ^= (a << 10);	\
77 		c -= a; c -= b; c ^= (b >> 15);	\
78 	} while (0)
79 
80 /*
81  * hash function based on bridge_hash in if_bridge.c
82  */
83 static void
84 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
85     struct pf_poolhashkey *key, sa_family_t af)
86 {
87 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
88 
89 	switch (af) {
90 #ifdef INET
91 	case AF_INET:
92 		a += inaddr->addr32[0];
93 		b += key->key32[1];
94 		mix(a, b, c);
95 		hash->addr32[0] = c + key->key32[2];
96 		break;
97 #endif /* INET */
98 #ifdef INET6
99 	case AF_INET6:
100 		a += inaddr->addr32[0];
101 		b += inaddr->addr32[2];
102 		mix(a, b, c);
103 		hash->addr32[0] = c;
104 		a += inaddr->addr32[1];
105 		b += inaddr->addr32[3];
106 		c += key->key32[1];
107 		mix(a, b, c);
108 		hash->addr32[1] = c;
109 		a += inaddr->addr32[2];
110 		b += inaddr->addr32[1];
111 		c += key->key32[2];
112 		mix(a, b, c);
113 		hash->addr32[2] = c;
114 		a += inaddr->addr32[3];
115 		b += inaddr->addr32[0];
116 		c += key->key32[3];
117 		mix(a, b, c);
118 		hash->addr32[3] = c;
119 		break;
120 #endif /* INET6 */
121 	}
122 }
123 
124 static struct pf_krule *
125 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
126     struct pfi_kkif *kif, struct pf_addr *saddr, u_int16_t sport,
127     struct pf_addr *daddr, uint16_t dport, int rs_num,
128     struct pf_kanchor_stackframe *anchor_stack)
129 {
130 	struct pf_krule		*r, *rm = NULL;
131 	struct pf_kruleset	*ruleset = NULL;
132 	int			 tag = -1;
133 	int			 rtableid = -1;
134 	int			 asd = 0;
135 
136 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
137 	while (r != NULL) {
138 		struct pf_rule_addr	*src = NULL, *dst = NULL;
139 		struct pf_addr_wrap	*xdst = NULL;
140 
141 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
142 			src = &r->dst;
143 			if (r->rpool.cur != NULL)
144 				xdst = &r->rpool.cur->addr;
145 		} else {
146 			src = &r->src;
147 			dst = &r->dst;
148 		}
149 
150 		pf_counter_u64_add(&r->evaluations, 1);
151 		if (pfi_kkif_match(r->kif, kif) == r->ifnot)
152 			r = r->skip[PF_SKIP_IFP].ptr;
153 		else if (r->direction && r->direction != pd->dir)
154 			r = r->skip[PF_SKIP_DIR].ptr;
155 		else if (r->af && r->af != pd->af)
156 			r = r->skip[PF_SKIP_AF].ptr;
157 		else if (r->proto && r->proto != pd->proto)
158 			r = r->skip[PF_SKIP_PROTO].ptr;
159 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
160 		    src->neg, kif, M_GETFIB(m)))
161 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
162 			    PF_SKIP_DST_ADDR].ptr;
163 		else if (src->port_op && !pf_match_port(src->port_op,
164 		    src->port[0], src->port[1], sport))
165 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
166 			    PF_SKIP_DST_PORT].ptr;
167 		else if (dst != NULL &&
168 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
169 		    M_GETFIB(m)))
170 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
171 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
172 		    0, NULL, M_GETFIB(m)))
173 			r = TAILQ_NEXT(r, entries);
174 		else if (dst != NULL && dst->port_op &&
175 		    !pf_match_port(dst->port_op, dst->port[0],
176 		    dst->port[1], dport))
177 			r = r->skip[PF_SKIP_DST_PORT].ptr;
178 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
179 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
180 			r = TAILQ_NEXT(r, entries);
181 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
182 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
183 		    off, &pd->hdr.tcp), r->os_fingerprint)))
184 			r = TAILQ_NEXT(r, entries);
185 		else {
186 			if (r->tag)
187 				tag = r->tag;
188 			if (r->rtableid >= 0)
189 				rtableid = r->rtableid;
190 			if (r->anchor == NULL) {
191 				rm = r;
192 				if (rm->action == PF_NONAT ||
193 				    rm->action == PF_NORDR ||
194 				    rm->action == PF_NOBINAT) {
195 					rm = NULL;
196 				}
197 				break;
198 			} else
199 				pf_step_into_anchor(anchor_stack, &asd,
200 				    &ruleset, rs_num, &r, NULL, NULL);
201 		}
202 		if (r == NULL)
203 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
204 			    rs_num, &r, NULL, NULL);
205 	}
206 
207 	if (tag > 0 && pf_tag_packet(m, pd, tag))
208 		return (NULL);
209 	if (rtableid >= 0)
210 		M_SETFIB(m, rtableid);
211 
212 	return (rm);
213 }
214 
215 static int
216 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
217     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
218     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
219     uint16_t high, struct pf_ksrc_node **sn)
220 {
221 	struct pf_state_key_cmp	key;
222 	struct pf_addr		init_addr;
223 
224 	bzero(&init_addr, sizeof(init_addr));
225 	if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
226 		return (1);
227 
228 	if (proto == IPPROTO_ICMP) {
229 		if (*nport == htons(ICMP_ECHO)) {
230 			low = 1;
231 			high = 65535;
232 		} else
233 			return (0);	/* Don't try to modify non-echo ICMP */
234 	}
235 #ifdef INET6
236 	if (proto == IPPROTO_ICMPV6) {
237 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
238 			low = 1;
239 			high = 65535;
240 		} else
241 			return (0);	/* Don't try to modify non-echo ICMP */
242 	}
243 #endif /* INET6 */
244 
245 	bzero(&key, sizeof(key));
246 	key.af = af;
247 	key.proto = proto;
248 	key.port[0] = dport;
249 	PF_ACPY(&key.addr[0], daddr, key.af);
250 
251 	do {
252 		PF_ACPY(&key.addr[1], naddr, key.af);
253 
254 		/*
255 		 * port search; start random, step;
256 		 * similar 2 portloop in in_pcbbind
257 		 */
258 		if (proto == IPPROTO_SCTP) {
259 			key.port[1] = sport;
260 			if (!pf_find_state_all_exists(&key, PF_IN)) {
261 				*nport = sport;
262 				return (0);
263 			} else {
264 				return (1); /* Fail mapping. */
265 			}
266 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
267 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
268 			/*
269 			 * XXX bug: icmp states don't use the id on both sides.
270 			 * (traceroute -I through nat)
271 			 */
272 			key.port[1] = sport;
273 			if (!pf_find_state_all_exists(&key, PF_IN)) {
274 				*nport = sport;
275 				return (0);
276 			}
277 		} else if (low == high) {
278 			key.port[1] = htons(low);
279 			if (!pf_find_state_all_exists(&key, PF_IN)) {
280 				*nport = htons(low);
281 				return (0);
282 			}
283 		} else {
284 			uint32_t tmp;
285 			uint16_t cut;
286 
287 			if (low > high) {
288 				tmp = low;
289 				low = high;
290 				high = tmp;
291 			}
292 			/* low < high */
293 			cut = arc4random() % (1 + high - low) + low;
294 			/* low <= cut <= high */
295 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
296 				key.port[1] = htons(tmp);
297 				if (!pf_find_state_all_exists(&key, PF_IN)) {
298 					*nport = htons(tmp);
299 					return (0);
300 				}
301 			}
302 			tmp = cut;
303 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
304 				key.port[1] = htons(tmp);
305 				if (!pf_find_state_all_exists(&key, PF_IN)) {
306 					*nport = htons(tmp);
307 					return (0);
308 				}
309 			}
310 		}
311 
312 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
313 		case PF_POOL_RANDOM:
314 		case PF_POOL_ROUNDROBIN:
315 			/*
316 			 * pick a different source address since we're out
317 			 * of free port choices for the current one.
318 			 */
319 			if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
320 				return (1);
321 			break;
322 		case PF_POOL_NONE:
323 		case PF_POOL_SRCHASH:
324 		case PF_POOL_BITMASK:
325 		default:
326 			return (1);
327 		}
328 	} while (! PF_AEQ(&init_addr, naddr, af) );
329 	return (1);					/* none available */
330 }
331 
332 static int
333 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
334     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
335     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
336     struct pf_ksrc_node **sn)
337 {
338 	uint16_t psmask, low, highmask;
339 	uint16_t i, ahigh, cut;
340 	int ashift, psidshift;
341 
342 	ashift = 16 - r->rpool.mape.offset;
343 	psidshift = ashift - r->rpool.mape.psidlen;
344 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
345 	psmask = psmask << psidshift;
346 	highmask = (1U << psidshift) - 1;
347 
348 	ahigh = (1U << r->rpool.mape.offset) - 1;
349 	cut = arc4random() & ahigh;
350 	if (cut == 0)
351 		cut = 1;
352 
353 	for (i = cut; i <= ahigh; i++) {
354 		low = (i << ashift) | psmask;
355 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
356 		    naddr, nport, low, low | highmask, sn))
357 			return (0);
358 	}
359 	for (i = cut - 1; i > 0; i--) {
360 		low = (i << ashift) | psmask;
361 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
362 		    naddr, nport, low, low | highmask, sn))
363 			return (0);
364 	}
365 	return (1);
366 }
367 
368 u_short
369 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
370     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
371     struct pf_ksrc_node **sn)
372 {
373 	u_short			 reason = 0;
374 	struct pf_kpool		*rpool = &r->rpool;
375 	struct pf_addr		*raddr = NULL, *rmask = NULL;
376 	struct pf_srchash	*sh = NULL;
377 
378 	/* Try to find a src_node if none was given and this
379 	   is a sticky-address rule. */
380 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
381 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
382 		*sn = pf_find_src_node(saddr, r, af, &sh, false);
383 
384 	/* If a src_node was found or explicitly given and it has a non-zero
385 	   route address, use this address. A zeroed address is found if the
386 	   src node was created just a moment ago in pf_create_state and it
387 	   needs to be filled in with routing decision calculated here. */
388 	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
389 		/* If the supplied address is the same as the current one we've
390 		 * been asked before, so tell the caller that there's no other
391 		 * address to be had. */
392 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
393 			reason = PFRES_MAPFAILED;
394 			goto done;
395 		}
396 
397 		PF_ACPY(naddr, &(*sn)->raddr, af);
398 		if (nkif)
399 			*nkif = (*sn)->rkif;
400 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
401 			printf("pf_map_addr: src tracking maps ");
402 			pf_print_host(saddr, 0, af);
403 			printf(" to ");
404 			pf_print_host(naddr, 0, af);
405 			if (nkif)
406 				printf("@%s", (*nkif)->pfik_name);
407 			printf("\n");
408 		}
409 		goto done;
410 	}
411 
412 	mtx_lock(&rpool->mtx);
413 	/* Find the route using chosen algorithm. Store the found route
414 	   in src_node if it was given or found. */
415 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
416 		reason = PFRES_MAPFAILED;
417 		goto done_pool_mtx;
418 	}
419 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
420 		switch (af) {
421 #ifdef INET
422 		case AF_INET:
423 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
424 			    (rpool->opts & PF_POOL_TYPEMASK) !=
425 			    PF_POOL_ROUNDROBIN) {
426 				reason = PFRES_MAPFAILED;
427 				goto done_pool_mtx;
428 			}
429 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
430 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
431 			break;
432 #endif /* INET */
433 #ifdef INET6
434 		case AF_INET6:
435 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
436 			    (rpool->opts & PF_POOL_TYPEMASK) !=
437 			    PF_POOL_ROUNDROBIN) {
438 				reason = PFRES_MAPFAILED;
439 				goto done_pool_mtx;
440 			}
441 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
442 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
443 			break;
444 #endif /* INET6 */
445 		}
446 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
447 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
448 			reason = PFRES_MAPFAILED;
449 			goto done_pool_mtx; /* unsupported */
450 		}
451 	} else {
452 		raddr = &rpool->cur->addr.v.a.addr;
453 		rmask = &rpool->cur->addr.v.a.mask;
454 	}
455 
456 	switch (rpool->opts & PF_POOL_TYPEMASK) {
457 	case PF_POOL_NONE:
458 		PF_ACPY(naddr, raddr, af);
459 		break;
460 	case PF_POOL_BITMASK:
461 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
462 		break;
463 	case PF_POOL_RANDOM:
464 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
465 			switch (af) {
466 #ifdef INET
467 			case AF_INET:
468 				rpool->counter.addr32[0] = htonl(arc4random());
469 				break;
470 #endif /* INET */
471 #ifdef INET6
472 			case AF_INET6:
473 				if (rmask->addr32[3] != 0xffffffff)
474 					rpool->counter.addr32[3] =
475 					    htonl(arc4random());
476 				else
477 					break;
478 				if (rmask->addr32[2] != 0xffffffff)
479 					rpool->counter.addr32[2] =
480 					    htonl(arc4random());
481 				else
482 					break;
483 				if (rmask->addr32[1] != 0xffffffff)
484 					rpool->counter.addr32[1] =
485 					    htonl(arc4random());
486 				else
487 					break;
488 				if (rmask->addr32[0] != 0xffffffff)
489 					rpool->counter.addr32[0] =
490 					    htonl(arc4random());
491 				break;
492 #endif /* INET6 */
493 			}
494 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
495 			PF_ACPY(init_addr, naddr, af);
496 
497 		} else {
498 			PF_AINC(&rpool->counter, af);
499 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
500 		}
501 		break;
502 	case PF_POOL_SRCHASH:
503 	    {
504 		unsigned char hash[16];
505 
506 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
507 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
508 		break;
509 	    }
510 	case PF_POOL_ROUNDROBIN:
511 	    {
512 		struct pf_kpooladdr *acur = rpool->cur;
513 
514 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
515 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
516 			    &rpool->tblidx, &rpool->counter, af))
517 				goto get_addr;
518 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
519 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
520 			    &rpool->tblidx, &rpool->counter, af))
521 				goto get_addr;
522 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
523 			goto get_addr;
524 
525 	try_next:
526 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
527 			rpool->cur = TAILQ_FIRST(&rpool->list);
528 		else
529 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
530 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
531 			rpool->tblidx = -1;
532 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
533 			    &rpool->tblidx, &rpool->counter, af)) {
534 				/* table contains no address of type 'af' */
535 				if (rpool->cur != acur)
536 					goto try_next;
537 				reason = PFRES_MAPFAILED;
538 				goto done_pool_mtx;
539 			}
540 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
541 			rpool->tblidx = -1;
542 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
543 			    &rpool->tblidx, &rpool->counter, af)) {
544 				/* table contains no address of type 'af' */
545 				if (rpool->cur != acur)
546 					goto try_next;
547 				reason = PFRES_MAPFAILED;
548 				goto done_pool_mtx;
549 			}
550 		} else {
551 			raddr = &rpool->cur->addr.v.a.addr;
552 			rmask = &rpool->cur->addr.v.a.mask;
553 			PF_ACPY(&rpool->counter, raddr, af);
554 		}
555 
556 	get_addr:
557 		PF_ACPY(naddr, &rpool->counter, af);
558 		if (init_addr != NULL && PF_AZERO(init_addr, af))
559 			PF_ACPY(init_addr, naddr, af);
560 		PF_AINC(&rpool->counter, af);
561 		break;
562 	    }
563 	}
564 
565 	if (nkif)
566 		*nkif = rpool->cur->kif;
567 
568 	if (*sn != NULL) {
569 		PF_ACPY(&(*sn)->raddr, naddr, af);
570 		if (nkif)
571 			(*sn)->rkif = *nkif;
572 	}
573 
574 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
575 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
576 		printf("pf_map_addr: selected address ");
577 		pf_print_host(naddr, 0, af);
578 		if (nkif)
579 			printf("@%s", (*nkif)->pfik_name);
580 		printf("\n");
581 	}
582 
583 done_pool_mtx:
584 	mtx_unlock(&rpool->mtx);
585 
586 done:
587 	if (reason) {
588 		counter_u64_add(V_pf_status.counters[reason], 1);
589 	}
590 
591 	return (reason);
592 }
593 
594 u_short
595 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
596     struct pfi_kkif *kif, struct pf_ksrc_node **sn,
597     struct pf_state_key **skp, struct pf_state_key **nkp,
598     struct pf_addr *saddr, struct pf_addr *daddr,
599     uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
600     struct pf_krule **rp)
601 {
602 	struct pf_krule	*r = NULL;
603 	struct pf_addr	*naddr;
604 	uint16_t	*nportp;
605 	uint16_t	 low, high;
606 	u_short		 reason;
607 
608 	PF_RULES_RASSERT();
609 	KASSERT(*skp == NULL, ("*skp not NULL"));
610 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
611 
612 	*rp = NULL;
613 
614 	if (pd->dir == PF_OUT) {
615 		r = pf_match_translation(pd, m, off, kif, saddr,
616 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
617 		if (r == NULL)
618 			r = pf_match_translation(pd, m, off, kif,
619 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
620 			    anchor_stack);
621 	} else {
622 		r = pf_match_translation(pd, m, off, kif, saddr,
623 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
624 		if (r == NULL)
625 			r = pf_match_translation(pd, m, off, kif,
626 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
627 			    anchor_stack);
628 	}
629 
630 	if (r == NULL)
631 		return (PFRES_MAX);
632 
633 	switch (r->action) {
634 	case PF_NONAT:
635 	case PF_NOBINAT:
636 	case PF_NORDR:
637 		return (PFRES_MAX);
638 	}
639 
640 	*skp = pf_state_key_setup(pd, m, off, saddr, daddr, sport, dport);
641 	if (*skp == NULL)
642 		return (PFRES_MEMORY);
643 	*nkp = pf_state_key_clone(*skp);
644 	if (*nkp == NULL) {
645 		uma_zfree(V_pf_state_key_z, *skp);
646 		*skp = NULL;
647 		return (PFRES_MEMORY);
648 	}
649 
650 	naddr = &(*nkp)->addr[1];
651 	nportp = &(*nkp)->port[1];
652 
653 	switch (r->action) {
654 	case PF_NAT:
655 		if (pd->proto == IPPROTO_ICMP) {
656 			low = 1;
657 			high = 65535;
658 		} else {
659 			low  = r->rpool.proxy_port[0];
660 			high = r->rpool.proxy_port[1];
661 		}
662 		if (r->rpool.mape.offset > 0) {
663 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
664 			    sport, daddr, dport, naddr, nportp, sn)) {
665 				DPFPRINTF(PF_DEBUG_MISC,
666 				    ("pf: MAP-E port allocation (%u/%u/%u)"
667 				    " failed\n",
668 				    r->rpool.mape.offset,
669 				    r->rpool.mape.psidlen,
670 				    r->rpool.mape.psid));
671 				reason = PFRES_MAPFAILED;
672 				goto notrans;
673 			}
674 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
675 		    daddr, dport, naddr, nportp, low, high, sn)) {
676 			DPFPRINTF(PF_DEBUG_MISC,
677 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
678 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
679 			reason = PFRES_MAPFAILED;
680 			goto notrans;
681 		}
682 		break;
683 	case PF_BINAT:
684 		switch (pd->dir) {
685 		case PF_OUT:
686 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
687 				switch (pd->af) {
688 #ifdef INET
689 				case AF_INET:
690 					if (r->rpool.cur->addr.p.dyn->
691 					    pfid_acnt4 < 1) {
692 						reason = PFRES_MAPFAILED;
693 						goto notrans;
694 					}
695 					PF_POOLMASK(naddr,
696 					    &r->rpool.cur->addr.p.dyn->
697 					    pfid_addr4,
698 					    &r->rpool.cur->addr.p.dyn->
699 					    pfid_mask4, saddr, AF_INET);
700 					break;
701 #endif /* INET */
702 #ifdef INET6
703 				case AF_INET6:
704 					if (r->rpool.cur->addr.p.dyn->
705 					    pfid_acnt6 < 1) {
706 						reason = PFRES_MAPFAILED;
707 						goto notrans;
708 					}
709 					PF_POOLMASK(naddr,
710 					    &r->rpool.cur->addr.p.dyn->
711 					    pfid_addr6,
712 					    &r->rpool.cur->addr.p.dyn->
713 					    pfid_mask6, saddr, AF_INET6);
714 					break;
715 #endif /* INET6 */
716 				}
717 			} else
718 				PF_POOLMASK(naddr,
719 				    &r->rpool.cur->addr.v.a.addr,
720 				    &r->rpool.cur->addr.v.a.mask, saddr,
721 				    pd->af);
722 			break;
723 		case PF_IN:
724 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
725 				switch (pd->af) {
726 #ifdef INET
727 				case AF_INET:
728 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
729 						reason = PFRES_MAPFAILED;
730 						goto notrans;
731 					}
732 					PF_POOLMASK(naddr,
733 					    &r->src.addr.p.dyn->pfid_addr4,
734 					    &r->src.addr.p.dyn->pfid_mask4,
735 					    daddr, AF_INET);
736 					break;
737 #endif /* INET */
738 #ifdef INET6
739 				case AF_INET6:
740 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
741 						reason = PFRES_MAPFAILED;
742 						goto notrans;
743 					}
744 					PF_POOLMASK(naddr,
745 					    &r->src.addr.p.dyn->pfid_addr6,
746 					    &r->src.addr.p.dyn->pfid_mask6,
747 					    daddr, AF_INET6);
748 					break;
749 #endif /* INET6 */
750 				}
751 			} else
752 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
753 				    &r->src.addr.v.a.mask, daddr, pd->af);
754 			break;
755 		}
756 		break;
757 	case PF_RDR: {
758 		struct pf_state_key_cmp key;
759 		uint16_t cut, low, high, nport;
760 
761 		reason = pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn);
762 		if (reason != 0)
763 			goto notrans;
764 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
765 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
766 			    daddr, pd->af);
767 
768 		/* Do not change SCTP ports. */
769 		if (pd->proto == IPPROTO_SCTP)
770 			break;
771 
772 		if (r->rpool.proxy_port[1]) {
773 			uint32_t	tmp_nport;
774 
775 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
776 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
777 			    1)) + r->rpool.proxy_port[0];
778 
779 			/* Wrap around if necessary. */
780 			if (tmp_nport > 65535)
781 				tmp_nport -= 65535;
782 			nport = htons((uint16_t)tmp_nport);
783 		} else if (r->rpool.proxy_port[0])
784 			nport = htons(r->rpool.proxy_port[0]);
785 		else
786 			nport = dport;
787 
788 		/*
789 		 * Update the destination port.
790 		 */
791 		*nportp = nport;
792 
793 		/*
794 		 * Do we have a source port conflict in the stack state?  Try to
795 		 * modulate the source port if so.  Note that this is racy since
796 		 * the state lookup may not find any matches here but will once
797 		 * pf_create_state() actually instantiates the state.
798 		 */
799 		bzero(&key, sizeof(key));
800 		key.af = pd->af;
801 		key.proto = pd->proto;
802 		key.port[0] = sport;
803 		PF_ACPY(&key.addr[0], saddr, key.af);
804 		key.port[1] = nport;
805 		PF_ACPY(&key.addr[1], naddr, key.af);
806 
807 		if (!pf_find_state_all_exists(&key, PF_OUT))
808 			break;
809 
810 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
811 		high = 65535;
812 		cut = arc4random() % (1 + high - low) + low;
813 		for (uint32_t tmp = cut;
814 		    tmp <= high && tmp <= UINT16_MAX; tmp++) {
815 			key.port[0] = htons(tmp);
816 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
817 				/* Update the source port. */
818 				(*nkp)->port[0] = htons(tmp);
819 				goto out;
820 			}
821 		}
822 		for (uint32_t tmp = cut - 1; tmp >= low; tmp--) {
823 			key.port[0] = htons(tmp);
824 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
825 				/* Update the source port. */
826 				(*nkp)->port[0] = htons(tmp);
827 				goto out;
828 			}
829 		}
830 
831 		DPFPRINTF(PF_DEBUG_MISC,
832 		    ("pf: RDR source port allocation failed\n"));
833 		reason = PFRES_MAPFAILED;
834 		goto notrans;
835 
836 out:
837 		DPFPRINTF(PF_DEBUG_MISC,
838 		    ("pf: RDR source port allocation %u->%u\n",
839 		    ntohs(sport), ntohs((*nkp)->port[0])));
840 		break;
841 	}
842 	default:
843 		panic("%s: unknown action %u", __func__, r->action);
844 	}
845 
846 	/* Return success only if translation really happened. */
847 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
848 		*rp = r;
849 		return (PFRES_MATCH);
850 	}
851 
852 	reason = PFRES_MAX;
853 notrans:
854 	uma_zfree(V_pf_state_key_z, *nkp);
855 	uma_zfree(V_pf_state_key_z, *skp);
856 	*skp = *nkp = NULL;
857 	*sn = NULL;
858 
859 	return (reason);
860 }
861