xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision 96190b4fef3b4a0cc3ca0606b0c4e3e69a5e6717)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 /*
56  * Limit the amount of work we do to find a free source port for redirects that
57  * introduce a state conflict.
58  */
59 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
60 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
61 
62 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
63 
64 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
65 			    struct pf_poolhashkey *, sa_family_t);
66 static struct pf_krule	*pf_match_translation(struct pf_pdesc *,
67 			    struct pf_addr *, u_int16_t,
68 			    struct pf_addr *, uint16_t, int,
69 			    struct pf_kanchor_stackframe *);
70 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
71     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
72     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
73     struct pf_udp_mapping **);
74 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
75 
76 #define mix(a,b,c) \
77 	do {					\
78 		a -= b; a -= c; a ^= (c >> 13);	\
79 		b -= c; b -= a; b ^= (a << 8);	\
80 		c -= a; c -= b; c ^= (b >> 13);	\
81 		a -= b; a -= c; a ^= (c >> 12);	\
82 		b -= c; b -= a; b ^= (a << 16);	\
83 		c -= a; c -= b; c ^= (b >> 5);	\
84 		a -= b; a -= c; a ^= (c >> 3);	\
85 		b -= c; b -= a; b ^= (a << 10);	\
86 		c -= a; c -= b; c ^= (b >> 15);	\
87 	} while (0)
88 
89 /*
90  * hash function based on bridge_hash in if_bridge.c
91  */
92 static void
93 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
94     struct pf_poolhashkey *key, sa_family_t af)
95 {
96 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
97 
98 	switch (af) {
99 #ifdef INET
100 	case AF_INET:
101 		a += inaddr->addr32[0];
102 		b += key->key32[1];
103 		mix(a, b, c);
104 		hash->addr32[0] = c + key->key32[2];
105 		break;
106 #endif /* INET */
107 #ifdef INET6
108 	case AF_INET6:
109 		a += inaddr->addr32[0];
110 		b += inaddr->addr32[2];
111 		mix(a, b, c);
112 		hash->addr32[0] = c;
113 		a += inaddr->addr32[1];
114 		b += inaddr->addr32[3];
115 		c += key->key32[1];
116 		mix(a, b, c);
117 		hash->addr32[1] = c;
118 		a += inaddr->addr32[2];
119 		b += inaddr->addr32[1];
120 		c += key->key32[2];
121 		mix(a, b, c);
122 		hash->addr32[2] = c;
123 		a += inaddr->addr32[3];
124 		b += inaddr->addr32[0];
125 		c += key->key32[3];
126 		mix(a, b, c);
127 		hash->addr32[3] = c;
128 		break;
129 #endif /* INET6 */
130 	}
131 }
132 
133 static struct pf_krule *
134 pf_match_translation(struct pf_pdesc *pd,
135     struct pf_addr *saddr, u_int16_t sport,
136     struct pf_addr *daddr, uint16_t dport, int rs_num,
137     struct pf_kanchor_stackframe *anchor_stack)
138 {
139 	struct pf_krule		*r, *rm = NULL;
140 	struct pf_kruleset	*ruleset = NULL;
141 	int			 tag = -1;
142 	int			 rtableid = -1;
143 	int			 asd = 0;
144 
145 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
146 	while (r != NULL) {
147 		struct pf_rule_addr	*src = NULL, *dst = NULL;
148 		struct pf_addr_wrap	*xdst = NULL;
149 
150 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
151 			src = &r->dst;
152 			if (r->rpool.cur != NULL)
153 				xdst = &r->rpool.cur->addr;
154 		} else {
155 			src = &r->src;
156 			dst = &r->dst;
157 		}
158 
159 		pf_counter_u64_add(&r->evaluations, 1);
160 		if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot)
161 			r = r->skip[PF_SKIP_IFP];
162 		else if (r->direction && r->direction != pd->dir)
163 			r = r->skip[PF_SKIP_DIR];
164 		else if (r->af && r->af != pd->af)
165 			r = r->skip[PF_SKIP_AF];
166 		else if (r->proto && r->proto != pd->proto)
167 			r = r->skip[PF_SKIP_PROTO];
168 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
169 		    src->neg, pd->kif, M_GETFIB(pd->m)))
170 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
171 			    PF_SKIP_DST_ADDR];
172 		else if (src->port_op && !pf_match_port(src->port_op,
173 		    src->port[0], src->port[1], sport))
174 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
175 			    PF_SKIP_DST_PORT];
176 		else if (dst != NULL &&
177 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
178 		    M_GETFIB(pd->m)))
179 			r = r->skip[PF_SKIP_DST_ADDR];
180 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
181 		    0, NULL, M_GETFIB(pd->m)))
182 			r = TAILQ_NEXT(r, entries);
183 		else if (dst != NULL && dst->port_op &&
184 		    !pf_match_port(dst->port_op, dst->port[0],
185 		    dst->port[1], dport))
186 			r = r->skip[PF_SKIP_DST_PORT];
187 		else if (r->match_tag && !pf_match_tag(pd->m, r, &tag,
188 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
189 			r = TAILQ_NEXT(r, entries);
190 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
191 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
192 		    &pd->hdr.tcp), r->os_fingerprint)))
193 			r = TAILQ_NEXT(r, entries);
194 		else {
195 			if (r->tag)
196 				tag = r->tag;
197 			if (r->rtableid >= 0)
198 				rtableid = r->rtableid;
199 			if (r->anchor == NULL) {
200 				rm = r;
201 				if (rm->action == PF_NONAT ||
202 				    rm->action == PF_NORDR ||
203 				    rm->action == PF_NOBINAT) {
204 					rm = NULL;
205 				}
206 				break;
207 			} else
208 				pf_step_into_anchor(anchor_stack, &asd,
209 				    &ruleset, rs_num, &r, NULL, NULL);
210 		}
211 		if (r == NULL)
212 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
213 			    rs_num, &r, NULL, NULL);
214 	}
215 
216 	if (tag > 0 && pf_tag_packet(pd, tag))
217 		return (NULL);
218 	if (rtableid >= 0)
219 		M_SETFIB(pd->m, rtableid);
220 
221 	return (rm);
222 }
223 
224 static int
225 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
226     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
227     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
228     uint16_t high, struct pf_ksrc_node **sn,
229     struct pf_udp_mapping **udp_mapping)
230 {
231 	struct pf_state_key_cmp	key;
232 	struct pf_addr		init_addr;
233 	struct pf_srchash	*sh = NULL;
234 
235 	bzero(&init_addr, sizeof(init_addr));
236 
237 	MPASS(*udp_mapping == NULL);
238 
239 	/*
240 	 * If we are UDP and have an existing mapping we can get source port
241 	 * from the mapping. In this case we have to look up the src_node as
242 	 * pf_map_addr would.
243 	 */
244 	if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
245 		struct pf_udp_endpoint_cmp udp_source;
246 
247 		bzero(&udp_source, sizeof(udp_source));
248 		udp_source.af = af;
249 		PF_ACPY(&udp_source.addr, saddr, af);
250 		udp_source.port = sport;
251 		*udp_mapping = pf_udp_mapping_find(&udp_source);
252 		if (*udp_mapping) {
253 			PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
254 			*nport = (*udp_mapping)->endpoints[1].port;
255 			/* Try to find a src_node as per pf_map_addr(). */
256 			if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
257 			    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
258 				*sn = pf_find_src_node(saddr, r, af, &sh, 0);
259 			return (0);
260 		} else {
261 			*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
262 			if (*udp_mapping == NULL)
263 				return (1);
264 		}
265 	}
266 
267 	if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn))
268 		goto failed;
269 
270 	if (proto == IPPROTO_ICMP) {
271 		if (*nport == htons(ICMP_ECHO)) {
272 			low = 1;
273 			high = 65535;
274 		} else
275 			return (0);	/* Don't try to modify non-echo ICMP */
276 	}
277 #ifdef INET6
278 	if (proto == IPPROTO_ICMPV6) {
279 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
280 			low = 1;
281 			high = 65535;
282 		} else
283 			return (0);	/* Don't try to modify non-echo ICMP */
284 	}
285 #endif /* INET6 */
286 
287 	bzero(&key, sizeof(key));
288 	key.af = af;
289 	key.proto = proto;
290 	key.port[0] = dport;
291 	PF_ACPY(&key.addr[0], daddr, key.af);
292 
293 	do {
294 		PF_ACPY(&key.addr[1], naddr, key.af);
295 		if (*udp_mapping)
296 			PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
297 
298 		/*
299 		 * port search; start random, step;
300 		 * similar 2 portloop in in_pcbbind
301 		 */
302 		if (proto == IPPROTO_SCTP) {
303 			key.port[1] = sport;
304 			if (!pf_find_state_all_exists(&key, PF_IN)) {
305 				*nport = sport;
306 				return (0);
307 			} else {
308 				return (1); /* Fail mapping. */
309 			}
310 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
311 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
312 			/*
313 			 * XXX bug: icmp states don't use the id on both sides.
314 			 * (traceroute -I through nat)
315 			 */
316 			key.port[1] = sport;
317 			if (!pf_find_state_all_exists(&key, PF_IN)) {
318 				*nport = sport;
319 				return (0);
320 			}
321 		} else if (low == high) {
322 			key.port[1] = htons(low);
323 			if (!pf_find_state_all_exists(&key, PF_IN)) {
324 				if (*udp_mapping != NULL) {
325 					(*udp_mapping)->endpoints[1].port = htons(low);
326 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
327 						*nport = htons(low);
328 						return (0);
329 					}
330 				} else {
331 					*nport = htons(low);
332 					return (0);
333 				}
334 			}
335 		} else {
336 			uint32_t tmp;
337 			uint16_t cut;
338 
339 			if (low > high) {
340 				tmp = low;
341 				low = high;
342 				high = tmp;
343 			}
344 			/* low < high */
345 			cut = arc4random() % (1 + high - low) + low;
346 			/* low <= cut <= high */
347 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
348 				if (*udp_mapping != NULL) {
349 					(*udp_mapping)->endpoints[1].port = htons(tmp);
350 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
351 						*nport = htons(tmp);
352 						return (0);
353 					}
354 				} else {
355 					key.port[1] = htons(tmp);
356 					if (!pf_find_state_all_exists(&key, PF_IN)) {
357 						*nport = htons(tmp);
358 						return (0);
359 					}
360 				}
361 			}
362 			tmp = cut;
363 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
364 				if (proto == IPPROTO_UDP &&
365 				    (r->rpool.opts & PF_POOL_ENDPI)) {
366 					(*udp_mapping)->endpoints[1].port = htons(tmp);
367 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
368 						*nport = htons(tmp);
369 						return (0);
370 					}
371 				} else {
372 					key.port[1] = htons(tmp);
373 					if (!pf_find_state_all_exists(&key, PF_IN)) {
374 						*nport = htons(tmp);
375 						return (0);
376 					}
377 				}
378 			}
379 		}
380 
381 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
382 		case PF_POOL_RANDOM:
383 		case PF_POOL_ROUNDROBIN:
384 			/*
385 			 * pick a different source address since we're out
386 			 * of free port choices for the current one.
387 			 */
388 			if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn))
389 				return (1);
390 			break;
391 		case PF_POOL_NONE:
392 		case PF_POOL_SRCHASH:
393 		case PF_POOL_BITMASK:
394 		default:
395 			return (1);
396 		}
397 	} while (! PF_AEQ(&init_addr, naddr, af) );
398 
399 failed:
400 	uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
401 	*udp_mapping = NULL;
402 	return (1);					/* none available */
403 }
404 
405 static bool
406 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
407 {
408 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
409 		return (true);
410 	return (false);
411 }
412 
413 static int
414 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
415     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
416     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
417     struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping)
418 {
419 	uint16_t psmask, low, highmask;
420 	uint16_t i, ahigh, cut;
421 	int ashift, psidshift;
422 
423 	ashift = 16 - r->rpool.mape.offset;
424 	psidshift = ashift - r->rpool.mape.psidlen;
425 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
426 	psmask = psmask << psidshift;
427 	highmask = (1U << psidshift) - 1;
428 
429 	ahigh = (1U << r->rpool.mape.offset) - 1;
430 	cut = arc4random() & ahigh;
431 	if (cut == 0)
432 		cut = 1;
433 
434 	for (i = cut; i <= ahigh; i++) {
435 		low = (i << ashift) | psmask;
436 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
437 		    naddr, nport, low, low | highmask, sn, udp_mapping))
438 			return (0);
439 	}
440 	for (i = cut - 1; i > 0; i--) {
441 		low = (i << ashift) | psmask;
442 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
443 		    naddr, nport, low, low | highmask, sn, udp_mapping))
444 			return (0);
445 	}
446 	return (1);
447 }
448 
449 u_short
450 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
451     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr)
452 {
453 	u_short			 reason = PFRES_MATCH;
454 	struct pf_kpool		*rpool = &r->rpool;
455 	struct pf_addr		*raddr = NULL, *rmask = NULL;
456 
457 	mtx_lock(&rpool->mtx);
458 	/* Find the route using chosen algorithm. Store the found route
459 	   in src_node if it was given or found. */
460 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
461 		reason = PFRES_MAPFAILED;
462 		goto done_pool_mtx;
463 	}
464 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
465 		switch (af) {
466 #ifdef INET
467 		case AF_INET:
468 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
469 			    (rpool->opts & PF_POOL_TYPEMASK) !=
470 			    PF_POOL_ROUNDROBIN) {
471 				reason = PFRES_MAPFAILED;
472 				goto done_pool_mtx;
473 			}
474 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
475 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
476 			break;
477 #endif /* INET */
478 #ifdef INET6
479 		case AF_INET6:
480 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
481 			    (rpool->opts & PF_POOL_TYPEMASK) !=
482 			    PF_POOL_ROUNDROBIN) {
483 				reason = PFRES_MAPFAILED;
484 				goto done_pool_mtx;
485 			}
486 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
487 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
488 			break;
489 #endif /* INET6 */
490 		}
491 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
492 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
493 			reason = PFRES_MAPFAILED;
494 			goto done_pool_mtx; /* unsupported */
495 		}
496 	} else {
497 		raddr = &rpool->cur->addr.v.a.addr;
498 		rmask = &rpool->cur->addr.v.a.mask;
499 	}
500 
501 	switch (rpool->opts & PF_POOL_TYPEMASK) {
502 	case PF_POOL_NONE:
503 		PF_ACPY(naddr, raddr, af);
504 		break;
505 	case PF_POOL_BITMASK:
506 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
507 		break;
508 	case PF_POOL_RANDOM:
509 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
510 			switch (af) {
511 #ifdef INET
512 			case AF_INET:
513 				rpool->counter.addr32[0] = htonl(arc4random());
514 				break;
515 #endif /* INET */
516 #ifdef INET6
517 			case AF_INET6:
518 				if (rmask->addr32[3] != 0xffffffff)
519 					rpool->counter.addr32[3] =
520 					    htonl(arc4random());
521 				else
522 					break;
523 				if (rmask->addr32[2] != 0xffffffff)
524 					rpool->counter.addr32[2] =
525 					    htonl(arc4random());
526 				else
527 					break;
528 				if (rmask->addr32[1] != 0xffffffff)
529 					rpool->counter.addr32[1] =
530 					    htonl(arc4random());
531 				else
532 					break;
533 				if (rmask->addr32[0] != 0xffffffff)
534 					rpool->counter.addr32[0] =
535 					    htonl(arc4random());
536 				break;
537 #endif /* INET6 */
538 			}
539 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
540 			PF_ACPY(init_addr, naddr, af);
541 
542 		} else {
543 			PF_AINC(&rpool->counter, af);
544 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
545 		}
546 		break;
547 	case PF_POOL_SRCHASH:
548 	    {
549 		unsigned char hash[16];
550 
551 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
552 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
553 		break;
554 	    }
555 	case PF_POOL_ROUNDROBIN:
556 	    {
557 		struct pf_kpooladdr *acur = rpool->cur;
558 
559 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
560 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
561 			    &rpool->tblidx, &rpool->counter, af, NULL))
562 				goto get_addr;
563 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
564 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
565 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
566 				goto get_addr;
567 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
568 			goto get_addr;
569 
570 	try_next:
571 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
572 			rpool->cur = TAILQ_FIRST(&rpool->list);
573 		else
574 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
575 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
576 			rpool->tblidx = -1;
577 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
578 			    &rpool->tblidx, &rpool->counter, af, NULL)) {
579 				/* table contains no address of type 'af' */
580 				if (rpool->cur != acur)
581 					goto try_next;
582 				reason = PFRES_MAPFAILED;
583 				goto done_pool_mtx;
584 			}
585 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
586 			rpool->tblidx = -1;
587 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
588 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
589 				/* table contains no address of type 'af' */
590 				if (rpool->cur != acur)
591 					goto try_next;
592 				reason = PFRES_MAPFAILED;
593 				goto done_pool_mtx;
594 			}
595 		} else {
596 			raddr = &rpool->cur->addr.v.a.addr;
597 			rmask = &rpool->cur->addr.v.a.mask;
598 			PF_ACPY(&rpool->counter, raddr, af);
599 		}
600 
601 	get_addr:
602 		PF_ACPY(naddr, &rpool->counter, af);
603 		if (init_addr != NULL && PF_AZERO(init_addr, af))
604 			PF_ACPY(init_addr, naddr, af);
605 		PF_AINC(&rpool->counter, af);
606 		break;
607 	    }
608 	}
609 
610 	if (nkif)
611 		*nkif = rpool->cur->kif;
612 
613 done_pool_mtx:
614 	mtx_unlock(&rpool->mtx);
615 
616 	if (reason) {
617 		counter_u64_add(V_pf_status.counters[reason], 1);
618 	}
619 
620 	return (reason);
621 }
622 
623 u_short
624 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
625     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
626     struct pf_ksrc_node **sn)
627 {
628 	u_short			 reason = 0;
629 	struct pf_kpool		*rpool = &r->rpool;
630 	struct pf_srchash	*sh = NULL;
631 
632 	/* Try to find a src_node if none was given and this
633 	   is a sticky-address rule. */
634 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
635 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
636 		*sn = pf_find_src_node(saddr, r, af, &sh, false);
637 
638 	/* If a src_node was found or explicitly given and it has a non-zero
639 	   route address, use this address. A zeroed address is found if the
640 	   src node was created just a moment ago in pf_create_state and it
641 	   needs to be filled in with routing decision calculated here. */
642 	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
643 		/* If the supplied address is the same as the current one we've
644 		 * been asked before, so tell the caller that there's no other
645 		 * address to be had. */
646 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
647 			reason = PFRES_MAPFAILED;
648 			goto done;
649 		}
650 
651 		PF_ACPY(naddr, &(*sn)->raddr, af);
652 		if (nkif)
653 			*nkif = (*sn)->rkif;
654 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
655 			printf("pf_map_addr: src tracking maps ");
656 			pf_print_host(saddr, 0, af);
657 			printf(" to ");
658 			pf_print_host(naddr, 0, af);
659 			if (nkif)
660 				printf("@%s", (*nkif)->pfik_name);
661 			printf("\n");
662 		}
663 		goto done;
664 	}
665 
666 	/*
667 	 * Source node has not been found. Find a new address and store it
668 	 * in variables given by the caller.
669 	 */
670 	if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr) != 0) {
671 		/* pf_map_addr() sets reason counters on its own */
672 		goto done;
673 	}
674 
675 	if (*sn != NULL) {
676 		PF_ACPY(&(*sn)->raddr, naddr, af);
677 		if (nkif)
678 			(*sn)->rkif = *nkif;
679 	}
680 
681 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
682 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
683 		printf("pf_map_addr: selected address ");
684 		pf_print_host(naddr, 0, af);
685 		if (nkif)
686 			printf("@%s", (*nkif)->pfik_name);
687 		printf("\n");
688 	}
689 
690 done:
691 	if (reason) {
692 		counter_u64_add(V_pf_status.counters[reason], 1);
693 	}
694 
695 	return (reason);
696 }
697 
698 u_short
699 pf_get_translation(struct pf_pdesc *pd, int off,
700     struct pf_ksrc_node **sn, struct pf_state_key **skp,
701     struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr,
702     uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
703     struct pf_krule **rp,
704     struct pf_udp_mapping **udp_mapping)
705 {
706 	struct pf_krule	*r = NULL;
707 	struct pf_addr	*naddr;
708 	uint16_t	*nportp;
709 	uint16_t	 low, high;
710 	u_short		 reason;
711 
712 	PF_RULES_RASSERT();
713 	KASSERT(*skp == NULL, ("*skp not NULL"));
714 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
715 
716 	*rp = NULL;
717 
718 	if (pd->dir == PF_OUT) {
719 		r = pf_match_translation(pd, saddr,
720 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
721 		if (r == NULL)
722 			r = pf_match_translation(pd,
723 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
724 			    anchor_stack);
725 	} else {
726 		r = pf_match_translation(pd, saddr,
727 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
728 		if (r == NULL)
729 			r = pf_match_translation(pd,
730 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
731 			    anchor_stack);
732 	}
733 
734 	if (r == NULL)
735 		return (PFRES_MAX);
736 
737 	switch (r->action) {
738 	case PF_NONAT:
739 	case PF_NOBINAT:
740 	case PF_NORDR:
741 		return (PFRES_MAX);
742 	}
743 
744 	*skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
745 	if (*skp == NULL)
746 		return (PFRES_MEMORY);
747 	*nkp = pf_state_key_clone(*skp);
748 	if (*nkp == NULL) {
749 		uma_zfree(V_pf_state_key_z, *skp);
750 		*skp = NULL;
751 		return (PFRES_MEMORY);
752 	}
753 
754 	naddr = &(*nkp)->addr[1];
755 	nportp = &(*nkp)->port[1];
756 
757 	switch (r->action) {
758 	case PF_NAT:
759 		if (pd->proto == IPPROTO_ICMP) {
760 			low = 1;
761 			high = 65535;
762 		} else {
763 			low  = r->rpool.proxy_port[0];
764 			high = r->rpool.proxy_port[1];
765 		}
766 		if (r->rpool.mape.offset > 0) {
767 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
768 			    sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
769 				DPFPRINTF(PF_DEBUG_MISC,
770 				    ("pf: MAP-E port allocation (%u/%u/%u)"
771 				    " failed\n",
772 				    r->rpool.mape.offset,
773 				    r->rpool.mape.psidlen,
774 				    r->rpool.mape.psid));
775 				reason = PFRES_MAPFAILED;
776 				goto notrans;
777 			}
778 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
779 		    daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
780 			DPFPRINTF(PF_DEBUG_MISC,
781 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
782 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
783 			reason = PFRES_MAPFAILED;
784 			goto notrans;
785 		}
786 		break;
787 	case PF_BINAT:
788 		switch (pd->dir) {
789 		case PF_OUT:
790 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
791 				switch (pd->af) {
792 #ifdef INET
793 				case AF_INET:
794 					if (r->rpool.cur->addr.p.dyn->
795 					    pfid_acnt4 < 1) {
796 						reason = PFRES_MAPFAILED;
797 						goto notrans;
798 					}
799 					PF_POOLMASK(naddr,
800 					    &r->rpool.cur->addr.p.dyn->
801 					    pfid_addr4,
802 					    &r->rpool.cur->addr.p.dyn->
803 					    pfid_mask4, saddr, AF_INET);
804 					break;
805 #endif /* INET */
806 #ifdef INET6
807 				case AF_INET6:
808 					if (r->rpool.cur->addr.p.dyn->
809 					    pfid_acnt6 < 1) {
810 						reason = PFRES_MAPFAILED;
811 						goto notrans;
812 					}
813 					PF_POOLMASK(naddr,
814 					    &r->rpool.cur->addr.p.dyn->
815 					    pfid_addr6,
816 					    &r->rpool.cur->addr.p.dyn->
817 					    pfid_mask6, saddr, AF_INET6);
818 					break;
819 #endif /* INET6 */
820 				}
821 			} else
822 				PF_POOLMASK(naddr,
823 				    &r->rpool.cur->addr.v.a.addr,
824 				    &r->rpool.cur->addr.v.a.mask, saddr,
825 				    pd->af);
826 			break;
827 		case PF_IN:
828 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
829 				switch (pd->af) {
830 #ifdef INET
831 				case AF_INET:
832 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
833 						reason = PFRES_MAPFAILED;
834 						goto notrans;
835 					}
836 					PF_POOLMASK(naddr,
837 					    &r->src.addr.p.dyn->pfid_addr4,
838 					    &r->src.addr.p.dyn->pfid_mask4,
839 					    daddr, AF_INET);
840 					break;
841 #endif /* INET */
842 #ifdef INET6
843 				case AF_INET6:
844 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
845 						reason = PFRES_MAPFAILED;
846 						goto notrans;
847 					}
848 					PF_POOLMASK(naddr,
849 					    &r->src.addr.p.dyn->pfid_addr6,
850 					    &r->src.addr.p.dyn->pfid_mask6,
851 					    daddr, AF_INET6);
852 					break;
853 #endif /* INET6 */
854 				}
855 			} else
856 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
857 				    &r->src.addr.v.a.mask, daddr, pd->af);
858 			break;
859 		}
860 		break;
861 	case PF_RDR: {
862 		struct pf_state_key_cmp key;
863 		int tries;
864 		uint16_t cut, low, high, nport;
865 
866 		reason = pf_map_addr_sn(pd->af, r, saddr, naddr, NULL, NULL, sn);
867 		if (reason != 0)
868 			goto notrans;
869 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
870 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
871 			    daddr, pd->af);
872 
873 		/* Do not change SCTP ports. */
874 		if (pd->proto == IPPROTO_SCTP)
875 			break;
876 
877 		if (r->rpool.proxy_port[1]) {
878 			uint32_t	tmp_nport;
879 
880 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
881 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
882 			    1)) + r->rpool.proxy_port[0];
883 
884 			/* Wrap around if necessary. */
885 			if (tmp_nport > 65535)
886 				tmp_nport -= 65535;
887 			nport = htons((uint16_t)tmp_nport);
888 		} else if (r->rpool.proxy_port[0])
889 			nport = htons(r->rpool.proxy_port[0]);
890 		else
891 			nport = dport;
892 
893 		/*
894 		 * Update the destination port.
895 		 */
896 		*nportp = nport;
897 
898 		/*
899 		 * Do we have a source port conflict in the stack state?  Try to
900 		 * modulate the source port if so.  Note that this is racy since
901 		 * the state lookup may not find any matches here but will once
902 		 * pf_create_state() actually instantiates the state.
903 		 */
904 		bzero(&key, sizeof(key));
905 		key.af = pd->af;
906 		key.proto = pd->proto;
907 		key.port[0] = sport;
908 		PF_ACPY(&key.addr[0], saddr, key.af);
909 		key.port[1] = nport;
910 		PF_ACPY(&key.addr[1], naddr, key.af);
911 
912 		if (!pf_find_state_all_exists(&key, PF_OUT))
913 			break;
914 
915 		tries = 0;
916 
917 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
918 		high = 65535;
919 		cut = arc4random() % (1 + high - low) + low;
920 		for (uint32_t tmp = cut;
921 		    tmp <= high && tmp <= UINT16_MAX &&
922 		    tries < V_pf_rdr_srcport_rewrite_tries;
923 		    tmp++, tries++) {
924 			key.port[0] = htons(tmp);
925 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
926 				/* Update the source port. */
927 				(*nkp)->port[0] = htons(tmp);
928 				goto out;
929 			}
930 		}
931 		for (uint32_t tmp = cut - 1;
932 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
933 		    tmp--, tries++) {
934 			key.port[0] = htons(tmp);
935 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
936 				/* Update the source port. */
937 				(*nkp)->port[0] = htons(tmp);
938 				goto out;
939 			}
940 		}
941 
942 		/*
943 		 * We failed to find a match.  Push on ahead anyway, let
944 		 * pf_state_insert() be the arbiter of whether the state
945 		 * conflict is tolerable.  In particular, with TCP connections
946 		 * the state may be reused if the TCP state is terminal.
947 		 */
948 		DPFPRINTF(PF_DEBUG_MISC,
949 		    ("pf: RDR source port allocation failed\n"));
950 		break;
951 
952 out:
953 		DPFPRINTF(PF_DEBUG_MISC,
954 		    ("pf: RDR source port allocation %u->%u\n",
955 		    ntohs(sport), ntohs((*nkp)->port[0])));
956 		break;
957 	}
958 	default:
959 		panic("%s: unknown action %u", __func__, r->action);
960 	}
961 
962 	/* Return success only if translation really happened. */
963 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
964 		*rp = r;
965 		return (PFRES_MATCH);
966 	}
967 
968 	reason = PFRES_MAX;
969 notrans:
970 	uma_zfree(V_pf_state_key_z, *nkp);
971 	uma_zfree(V_pf_state_key_z, *skp);
972 	*skp = *nkp = NULL;
973 	*sn = NULL;
974 
975 	return (reason);
976 }
977