xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision 9996c680d12a6245d808bc8def1d2411e736efd6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 /*
56  * Limit the amount of work we do to find a free source port for redirects that
57  * introduce a state conflict.
58  */
59 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
60 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
61 
62 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
63 
64 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
65 			    struct pf_poolhashkey *, sa_family_t);
66 static struct pf_krule	*pf_match_translation(struct pf_pdesc *,
67 			    struct pf_addr *, u_int16_t,
68 			    struct pf_addr *, uint16_t, int,
69 			    struct pf_kanchor_stackframe *);
70 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
71     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
72     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **, struct pf_srchash**,
73     struct pf_udp_mapping **);
74 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
75 
76 #define mix(a,b,c) \
77 	do {					\
78 		a -= b; a -= c; a ^= (c >> 13);	\
79 		b -= c; b -= a; b ^= (a << 8);	\
80 		c -= a; c -= b; c ^= (b >> 13);	\
81 		a -= b; a -= c; a ^= (c >> 12);	\
82 		b -= c; b -= a; b ^= (a << 16);	\
83 		c -= a; c -= b; c ^= (b >> 5);	\
84 		a -= b; a -= c; a ^= (c >> 3);	\
85 		b -= c; b -= a; b ^= (a << 10);	\
86 		c -= a; c -= b; c ^= (b >> 15);	\
87 	} while (0)
88 
89 /*
90  * hash function based on bridge_hash in if_bridge.c
91  */
92 static void
93 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
94     struct pf_poolhashkey *key, sa_family_t af)
95 {
96 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
97 
98 	switch (af) {
99 #ifdef INET
100 	case AF_INET:
101 		a += inaddr->addr32[0];
102 		b += key->key32[1];
103 		mix(a, b, c);
104 		hash->addr32[0] = c + key->key32[2];
105 		break;
106 #endif /* INET */
107 #ifdef INET6
108 	case AF_INET6:
109 		a += inaddr->addr32[0];
110 		b += inaddr->addr32[2];
111 		mix(a, b, c);
112 		hash->addr32[0] = c;
113 		a += inaddr->addr32[1];
114 		b += inaddr->addr32[3];
115 		c += key->key32[1];
116 		mix(a, b, c);
117 		hash->addr32[1] = c;
118 		a += inaddr->addr32[2];
119 		b += inaddr->addr32[1];
120 		c += key->key32[2];
121 		mix(a, b, c);
122 		hash->addr32[2] = c;
123 		a += inaddr->addr32[3];
124 		b += inaddr->addr32[0];
125 		c += key->key32[3];
126 		mix(a, b, c);
127 		hash->addr32[3] = c;
128 		break;
129 #endif /* INET6 */
130 	}
131 }
132 
133 static struct pf_krule *
134 pf_match_translation(struct pf_pdesc *pd,
135     struct pf_addr *saddr, u_int16_t sport,
136     struct pf_addr *daddr, uint16_t dport, int rs_num,
137     struct pf_kanchor_stackframe *anchor_stack)
138 {
139 	struct pf_krule		*r, *rm = NULL;
140 	struct pf_kruleset	*ruleset = NULL;
141 	int			 tag = -1;
142 	int			 rtableid = -1;
143 	int			 asd = 0;
144 
145 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
146 	while (r != NULL) {
147 		struct pf_rule_addr	*src = NULL, *dst = NULL;
148 		struct pf_addr_wrap	*xdst = NULL;
149 
150 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
151 			src = &r->dst;
152 			if (r->rpool.cur != NULL)
153 				xdst = &r->rpool.cur->addr;
154 		} else {
155 			src = &r->src;
156 			dst = &r->dst;
157 		}
158 
159 		pf_counter_u64_add(&r->evaluations, 1);
160 		if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot)
161 			r = r->skip[PF_SKIP_IFP];
162 		else if (r->direction && r->direction != pd->dir)
163 			r = r->skip[PF_SKIP_DIR];
164 		else if (r->af && r->af != pd->af)
165 			r = r->skip[PF_SKIP_AF];
166 		else if (r->proto && r->proto != pd->proto)
167 			r = r->skip[PF_SKIP_PROTO];
168 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
169 		    src->neg, pd->kif, M_GETFIB(pd->m)))
170 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
171 			    PF_SKIP_DST_ADDR];
172 		else if (src->port_op && !pf_match_port(src->port_op,
173 		    src->port[0], src->port[1], sport))
174 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
175 			    PF_SKIP_DST_PORT];
176 		else if (dst != NULL &&
177 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
178 		    M_GETFIB(pd->m)))
179 			r = r->skip[PF_SKIP_DST_ADDR];
180 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
181 		    0, NULL, M_GETFIB(pd->m)))
182 			r = TAILQ_NEXT(r, entries);
183 		else if (dst != NULL && dst->port_op &&
184 		    !pf_match_port(dst->port_op, dst->port[0],
185 		    dst->port[1], dport))
186 			r = r->skip[PF_SKIP_DST_PORT];
187 		else if (r->match_tag && !pf_match_tag(pd->m, r, &tag,
188 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
189 			r = TAILQ_NEXT(r, entries);
190 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
191 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
192 		    &pd->hdr.tcp), r->os_fingerprint)))
193 			r = TAILQ_NEXT(r, entries);
194 		else {
195 			if (r->tag)
196 				tag = r->tag;
197 			if (r->rtableid >= 0)
198 				rtableid = r->rtableid;
199 			if (r->anchor == NULL) {
200 				rm = r;
201 				if (rm->action == PF_NONAT ||
202 				    rm->action == PF_NORDR ||
203 				    rm->action == PF_NOBINAT) {
204 					rm = NULL;
205 				}
206 				break;
207 			} else
208 				pf_step_into_anchor(anchor_stack, &asd,
209 				    &ruleset, rs_num, &r, NULL, NULL);
210 		}
211 		if (r == NULL)
212 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
213 			    rs_num, &r, NULL, NULL);
214 	}
215 
216 	if (tag > 0 && pf_tag_packet(pd, tag))
217 		return (NULL);
218 	if (rtableid >= 0)
219 		M_SETFIB(pd->m, rtableid);
220 
221 	return (rm);
222 }
223 
224 static int
225 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
226     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
227     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
228     uint16_t high, struct pf_ksrc_node **sn, struct pf_srchash **sh,
229     struct pf_udp_mapping **udp_mapping)
230 {
231 	struct pf_state_key_cmp	key;
232 	struct pf_addr		init_addr;
233 
234 	bzero(&init_addr, sizeof(init_addr));
235 
236 	MPASS(*udp_mapping == NULL);
237 
238 	/*
239 	 * If we are UDP and have an existing mapping we can get source port
240 	 * from the mapping. In this case we have to look up the src_node as
241 	 * pf_map_addr would.
242 	 */
243 	if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
244 		struct pf_udp_endpoint_cmp udp_source;
245 
246 		bzero(&udp_source, sizeof(udp_source));
247 		udp_source.af = af;
248 		PF_ACPY(&udp_source.addr, saddr, af);
249 		udp_source.port = sport;
250 		*udp_mapping = pf_udp_mapping_find(&udp_source);
251 		if (*udp_mapping) {
252 			PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
253 			*nport = (*udp_mapping)->endpoints[1].port;
254 			/* Try to find a src_node as per pf_map_addr(). */
255 			if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
256 			    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
257 				*sn = pf_find_src_node(saddr, r, af, sh, false);
258 			if (*sn != NULL)
259 				PF_SRC_NODE_UNLOCK(*sn);
260 			return (0);
261 		} else {
262 			*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
263 			if (*udp_mapping == NULL)
264 				return (1);
265 		}
266 	}
267 
268 	if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn, sh))
269 		goto failed;
270 
271 	if (proto == IPPROTO_ICMP) {
272 		if (*nport == htons(ICMP_ECHO)) {
273 			low = 1;
274 			high = 65535;
275 		} else
276 			return (0);	/* Don't try to modify non-echo ICMP */
277 	}
278 #ifdef INET6
279 	if (proto == IPPROTO_ICMPV6) {
280 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
281 			low = 1;
282 			high = 65535;
283 		} else
284 			return (0);	/* Don't try to modify non-echo ICMP */
285 	}
286 #endif /* INET6 */
287 
288 	bzero(&key, sizeof(key));
289 	key.af = af;
290 	key.proto = proto;
291 	key.port[0] = dport;
292 	PF_ACPY(&key.addr[0], daddr, key.af);
293 
294 	do {
295 		PF_ACPY(&key.addr[1], naddr, key.af);
296 		if (*udp_mapping)
297 			PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
298 
299 		/*
300 		 * port search; start random, step;
301 		 * similar 2 portloop in in_pcbbind
302 		 */
303 		if (proto == IPPROTO_SCTP) {
304 			key.port[1] = sport;
305 			if (!pf_find_state_all_exists(&key, PF_IN)) {
306 				*nport = sport;
307 				return (0);
308 			} else {
309 				return (1); /* Fail mapping. */
310 			}
311 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
312 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
313 			/*
314 			 * XXX bug: icmp states don't use the id on both sides.
315 			 * (traceroute -I through nat)
316 			 */
317 			key.port[1] = sport;
318 			if (!pf_find_state_all_exists(&key, PF_IN)) {
319 				*nport = sport;
320 				return (0);
321 			}
322 		} else if (low == high) {
323 			key.port[1] = htons(low);
324 			if (!pf_find_state_all_exists(&key, PF_IN)) {
325 				if (*udp_mapping != NULL) {
326 					(*udp_mapping)->endpoints[1].port = htons(low);
327 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
328 						*nport = htons(low);
329 						return (0);
330 					}
331 				} else {
332 					*nport = htons(low);
333 					return (0);
334 				}
335 			}
336 		} else {
337 			uint32_t tmp;
338 			uint16_t cut;
339 
340 			if (low > high) {
341 				tmp = low;
342 				low = high;
343 				high = tmp;
344 			}
345 			/* low < high */
346 			cut = arc4random() % (1 + high - low) + low;
347 			/* low <= cut <= high */
348 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
349 				if (*udp_mapping != NULL) {
350 					(*udp_mapping)->endpoints[1].port = htons(tmp);
351 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
352 						*nport = htons(tmp);
353 						return (0);
354 					}
355 				} else {
356 					key.port[1] = htons(tmp);
357 					if (!pf_find_state_all_exists(&key, PF_IN)) {
358 						*nport = htons(tmp);
359 						return (0);
360 					}
361 				}
362 			}
363 			tmp = cut;
364 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
365 				if (proto == IPPROTO_UDP &&
366 				    (r->rpool.opts & PF_POOL_ENDPI)) {
367 					(*udp_mapping)->endpoints[1].port = htons(tmp);
368 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
369 						*nport = htons(tmp);
370 						return (0);
371 					}
372 				} else {
373 					key.port[1] = htons(tmp);
374 					if (!pf_find_state_all_exists(&key, PF_IN)) {
375 						*nport = htons(tmp);
376 						return (0);
377 					}
378 				}
379 			}
380 		}
381 
382 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
383 		case PF_POOL_RANDOM:
384 		case PF_POOL_ROUNDROBIN:
385 			/*
386 			 * pick a different source address since we're out
387 			 * of free port choices for the current one.
388 			 */
389 			(*sn) = NULL;
390 			if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn, sh))
391 				return (1);
392 			break;
393 		case PF_POOL_NONE:
394 		case PF_POOL_SRCHASH:
395 		case PF_POOL_BITMASK:
396 		default:
397 			return (1);
398 		}
399 	} while (! PF_AEQ(&init_addr, naddr, af) );
400 
401 failed:
402 	uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
403 	*udp_mapping = NULL;
404 	return (1);					/* none available */
405 }
406 
407 static bool
408 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
409 {
410 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
411 		return (true);
412 	return (false);
413 }
414 
415 static int
416 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
417     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
418     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
419     struct pf_ksrc_node **sn, struct pf_srchash **sh,
420     struct pf_udp_mapping **udp_mapping)
421 {
422 	uint16_t psmask, low, highmask;
423 	uint16_t i, ahigh, cut;
424 	int ashift, psidshift;
425 
426 	ashift = 16 - r->rpool.mape.offset;
427 	psidshift = ashift - r->rpool.mape.psidlen;
428 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
429 	psmask = psmask << psidshift;
430 	highmask = (1U << psidshift) - 1;
431 
432 	ahigh = (1U << r->rpool.mape.offset) - 1;
433 	cut = arc4random() & ahigh;
434 	if (cut == 0)
435 		cut = 1;
436 
437 	for (i = cut; i <= ahigh; i++) {
438 		low = (i << ashift) | psmask;
439 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
440 		    naddr, nport, low, low | highmask, sn, sh, udp_mapping))
441 			return (0);
442 	}
443 	for (i = cut - 1; i > 0; i--) {
444 		low = (i << ashift) | psmask;
445 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
446 		    naddr, nport, low, low | highmask, sn, sh, udp_mapping))
447 			return (0);
448 	}
449 	return (1);
450 }
451 
452 u_short
453 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
454     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr)
455 {
456 	u_short			 reason = PFRES_MATCH;
457 	struct pf_kpool		*rpool = &r->rpool;
458 	struct pf_addr		*raddr = NULL, *rmask = NULL;
459 
460 	mtx_lock(&rpool->mtx);
461 	/* Find the route using chosen algorithm. Store the found route
462 	   in src_node if it was given or found. */
463 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
464 		reason = PFRES_MAPFAILED;
465 		goto done_pool_mtx;
466 	}
467 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
468 		switch (af) {
469 #ifdef INET
470 		case AF_INET:
471 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
472 			    (rpool->opts & PF_POOL_TYPEMASK) !=
473 			    PF_POOL_ROUNDROBIN) {
474 				reason = PFRES_MAPFAILED;
475 				goto done_pool_mtx;
476 			}
477 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
478 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
479 			break;
480 #endif /* INET */
481 #ifdef INET6
482 		case AF_INET6:
483 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
484 			    (rpool->opts & PF_POOL_TYPEMASK) !=
485 			    PF_POOL_ROUNDROBIN) {
486 				reason = PFRES_MAPFAILED;
487 				goto done_pool_mtx;
488 			}
489 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
490 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
491 			break;
492 #endif /* INET6 */
493 		}
494 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
495 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
496 			reason = PFRES_MAPFAILED;
497 			goto done_pool_mtx; /* unsupported */
498 		}
499 	} else {
500 		raddr = &rpool->cur->addr.v.a.addr;
501 		rmask = &rpool->cur->addr.v.a.mask;
502 	}
503 
504 	switch (rpool->opts & PF_POOL_TYPEMASK) {
505 	case PF_POOL_NONE:
506 		PF_ACPY(naddr, raddr, af);
507 		break;
508 	case PF_POOL_BITMASK:
509 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
510 		break;
511 	case PF_POOL_RANDOM:
512 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
513 			switch (af) {
514 #ifdef INET
515 			case AF_INET:
516 				rpool->counter.addr32[0] = htonl(arc4random());
517 				break;
518 #endif /* INET */
519 #ifdef INET6
520 			case AF_INET6:
521 				if (rmask->addr32[3] != 0xffffffff)
522 					rpool->counter.addr32[3] =
523 					    htonl(arc4random());
524 				else
525 					break;
526 				if (rmask->addr32[2] != 0xffffffff)
527 					rpool->counter.addr32[2] =
528 					    htonl(arc4random());
529 				else
530 					break;
531 				if (rmask->addr32[1] != 0xffffffff)
532 					rpool->counter.addr32[1] =
533 					    htonl(arc4random());
534 				else
535 					break;
536 				if (rmask->addr32[0] != 0xffffffff)
537 					rpool->counter.addr32[0] =
538 					    htonl(arc4random());
539 				break;
540 #endif /* INET6 */
541 			}
542 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
543 			PF_ACPY(init_addr, naddr, af);
544 
545 		} else {
546 			PF_AINC(&rpool->counter, af);
547 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
548 		}
549 		break;
550 	case PF_POOL_SRCHASH:
551 	    {
552 		unsigned char hash[16];
553 
554 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
555 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
556 		break;
557 	    }
558 	case PF_POOL_ROUNDROBIN:
559 	    {
560 		struct pf_kpooladdr *acur = rpool->cur;
561 
562 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
563 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
564 			    &rpool->tblidx, &rpool->counter, af, NULL))
565 				goto get_addr;
566 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
567 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
568 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
569 				goto get_addr;
570 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
571 			goto get_addr;
572 
573 	try_next:
574 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
575 			rpool->cur = TAILQ_FIRST(&rpool->list);
576 		else
577 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
578 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
579 			rpool->tblidx = -1;
580 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
581 			    &rpool->tblidx, &rpool->counter, af, NULL)) {
582 				/* table contains no address of type 'af' */
583 				if (rpool->cur != acur)
584 					goto try_next;
585 				reason = PFRES_MAPFAILED;
586 				goto done_pool_mtx;
587 			}
588 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
589 			rpool->tblidx = -1;
590 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
591 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
592 				/* table contains no address of type 'af' */
593 				if (rpool->cur != acur)
594 					goto try_next;
595 				reason = PFRES_MAPFAILED;
596 				goto done_pool_mtx;
597 			}
598 		} else {
599 			raddr = &rpool->cur->addr.v.a.addr;
600 			rmask = &rpool->cur->addr.v.a.mask;
601 			PF_ACPY(&rpool->counter, raddr, af);
602 		}
603 
604 	get_addr:
605 		PF_ACPY(naddr, &rpool->counter, af);
606 		if (init_addr != NULL && PF_AZERO(init_addr, af))
607 			PF_ACPY(init_addr, naddr, af);
608 		PF_AINC(&rpool->counter, af);
609 		break;
610 	    }
611 	}
612 
613 	if (nkif)
614 		*nkif = rpool->cur->kif;
615 
616 done_pool_mtx:
617 	mtx_unlock(&rpool->mtx);
618 
619 	if (reason) {
620 		counter_u64_add(V_pf_status.counters[reason], 1);
621 	}
622 
623 	return (reason);
624 }
625 
626 u_short
627 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
628     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
629     struct pf_ksrc_node **sn, struct pf_srchash **sh)
630 {
631 	u_short			 reason = 0;
632 	struct pf_kpool		*rpool = &r->rpool;
633 
634 	/*
635 	 * Try to find a src_node if none was given and this is
636 	 * a sticky-address rule. Request the sh to be unlocked if
637 	 * sn was not found, as here we never insert a new sn.
638 	 */
639 	if (*sn == NULL) {
640 		if (r->rpool.opts & PF_POOL_STICKYADDR &&
641 		    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
642 			*sn = pf_find_src_node(saddr, r, af, sh, false);
643 	} else {
644 		pf_src_node_exists(sn, *sh);
645 	}
646 
647 	/* If a src_node was found or explicitly given and it has a non-zero
648 	   route address, use this address. A zeroed address is found if the
649 	   src node was created just a moment ago in pf_create_state and it
650 	   needs to be filled in with routing decision calculated here. */
651 	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
652 		PF_SRC_NODE_LOCK_ASSERT(*sn);
653 
654 		/* If the supplied address is the same as the current one we've
655 		 * been asked before, so tell the caller that there's no other
656 		 * address to be had. */
657 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
658 			reason = PFRES_MAPFAILED;
659 			goto done;
660 		}
661 
662 		PF_ACPY(naddr, &(*sn)->raddr, af);
663 		if (nkif)
664 			*nkif = (*sn)->rkif;
665 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
666 			printf("pf_map_addr: src tracking maps ");
667 			pf_print_host(saddr, 0, af);
668 			printf(" to ");
669 			pf_print_host(naddr, 0, af);
670 			if (nkif)
671 				printf("@%s", (*nkif)->pfik_name);
672 			printf("\n");
673 		}
674 		goto done;
675 	}
676 
677 	/*
678 	 * Source node has not been found. Find a new address and store it
679 	 * in variables given by the caller.
680 	 */
681 	if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr) != 0) {
682 		/* pf_map_addr() sets reason counters on its own */
683 		goto done;
684 	}
685 
686 	if (*sn != NULL) {
687 		PF_SRC_NODE_LOCK_ASSERT(*sn);
688 
689 		PF_ACPY(&(*sn)->raddr, naddr, af);
690 		if (nkif)
691 			(*sn)->rkif = *nkif;
692 	}
693 
694 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
695 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
696 		printf("pf_map_addr: selected address ");
697 		pf_print_host(naddr, 0, af);
698 		if (nkif)
699 			printf("@%s", (*nkif)->pfik_name);
700 		printf("\n");
701 	}
702 
703 done:
704 	if ((*sn) != NULL)
705 		PF_SRC_NODE_UNLOCK(*sn);
706 
707 	if (reason) {
708 		counter_u64_add(V_pf_status.counters[reason], 1);
709 	}
710 
711 	return (reason);
712 }
713 
714 u_short
715 pf_get_translation(struct pf_pdesc *pd, int off,
716     struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr,
717     struct pf_addr *daddr, uint16_t sport, uint16_t dport,
718     struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp,
719     struct pf_udp_mapping **udp_mapping)
720 {
721 	struct pf_krule	*r = NULL;
722 	struct pf_addr	*naddr;
723 	struct pf_ksrc_node	*sn = NULL;
724 	struct pf_srchash	*sh = NULL;
725 	uint16_t	*nportp;
726 	uint16_t	 low, high;
727 	u_short		 reason;
728 
729 	PF_RULES_RASSERT();
730 	KASSERT(*skp == NULL, ("*skp not NULL"));
731 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
732 
733 	*rp = NULL;
734 
735 	if (pd->dir == PF_OUT) {
736 		r = pf_match_translation(pd, saddr,
737 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
738 		if (r == NULL)
739 			r = pf_match_translation(pd,
740 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
741 			    anchor_stack);
742 	} else {
743 		r = pf_match_translation(pd, saddr,
744 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
745 		if (r == NULL)
746 			r = pf_match_translation(pd,
747 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
748 			    anchor_stack);
749 	}
750 
751 	if (r == NULL)
752 		return (PFRES_MAX);
753 
754 	switch (r->action) {
755 	case PF_NONAT:
756 	case PF_NOBINAT:
757 	case PF_NORDR:
758 		return (PFRES_MAX);
759 	}
760 
761 	*skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
762 	if (*skp == NULL)
763 		return (PFRES_MEMORY);
764 	*nkp = pf_state_key_clone(*skp);
765 	if (*nkp == NULL) {
766 		uma_zfree(V_pf_state_key_z, *skp);
767 		*skp = NULL;
768 		return (PFRES_MEMORY);
769 	}
770 
771 	naddr = &(*nkp)->addr[1];
772 	nportp = &(*nkp)->port[1];
773 
774 	switch (r->action) {
775 	case PF_NAT:
776 		if (pd->proto == IPPROTO_ICMP) {
777 			low = 1;
778 			high = 65535;
779 		} else {
780 			low  = r->rpool.proxy_port[0];
781 			high = r->rpool.proxy_port[1];
782 		}
783 		if (r->rpool.mape.offset > 0) {
784 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
785 			    sport, daddr, dport, naddr, nportp, &sn, &sh,
786 			    udp_mapping)) {
787 				DPFPRINTF(PF_DEBUG_MISC,
788 				    ("pf: MAP-E port allocation (%u/%u/%u)"
789 				    " failed\n",
790 				    r->rpool.mape.offset,
791 				    r->rpool.mape.psidlen,
792 				    r->rpool.mape.psid));
793 				reason = PFRES_MAPFAILED;
794 				goto notrans;
795 			}
796 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
797 		    daddr, dport, naddr, nportp, low, high, &sn, &sh,
798 		    udp_mapping)) {
799 			DPFPRINTF(PF_DEBUG_MISC,
800 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
801 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
802 			reason = PFRES_MAPFAILED;
803 			goto notrans;
804 		}
805 		break;
806 	case PF_BINAT:
807 		switch (pd->dir) {
808 		case PF_OUT:
809 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
810 				switch (pd->af) {
811 #ifdef INET
812 				case AF_INET:
813 					if (r->rpool.cur->addr.p.dyn->
814 					    pfid_acnt4 < 1) {
815 						reason = PFRES_MAPFAILED;
816 						goto notrans;
817 					}
818 					PF_POOLMASK(naddr,
819 					    &r->rpool.cur->addr.p.dyn->
820 					    pfid_addr4,
821 					    &r->rpool.cur->addr.p.dyn->
822 					    pfid_mask4, saddr, AF_INET);
823 					break;
824 #endif /* INET */
825 #ifdef INET6
826 				case AF_INET6:
827 					if (r->rpool.cur->addr.p.dyn->
828 					    pfid_acnt6 < 1) {
829 						reason = PFRES_MAPFAILED;
830 						goto notrans;
831 					}
832 					PF_POOLMASK(naddr,
833 					    &r->rpool.cur->addr.p.dyn->
834 					    pfid_addr6,
835 					    &r->rpool.cur->addr.p.dyn->
836 					    pfid_mask6, saddr, AF_INET6);
837 					break;
838 #endif /* INET6 */
839 				}
840 			} else
841 				PF_POOLMASK(naddr,
842 				    &r->rpool.cur->addr.v.a.addr,
843 				    &r->rpool.cur->addr.v.a.mask, saddr,
844 				    pd->af);
845 			break;
846 		case PF_IN:
847 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
848 				switch (pd->af) {
849 #ifdef INET
850 				case AF_INET:
851 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
852 						reason = PFRES_MAPFAILED;
853 						goto notrans;
854 					}
855 					PF_POOLMASK(naddr,
856 					    &r->src.addr.p.dyn->pfid_addr4,
857 					    &r->src.addr.p.dyn->pfid_mask4,
858 					    daddr, AF_INET);
859 					break;
860 #endif /* INET */
861 #ifdef INET6
862 				case AF_INET6:
863 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
864 						reason = PFRES_MAPFAILED;
865 						goto notrans;
866 					}
867 					PF_POOLMASK(naddr,
868 					    &r->src.addr.p.dyn->pfid_addr6,
869 					    &r->src.addr.p.dyn->pfid_mask6,
870 					    daddr, AF_INET6);
871 					break;
872 #endif /* INET6 */
873 				}
874 			} else
875 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
876 				    &r->src.addr.v.a.mask, daddr, pd->af);
877 			break;
878 		}
879 		break;
880 	case PF_RDR: {
881 		struct pf_state_key_cmp key;
882 		int tries;
883 		uint16_t cut, low, high, nport;
884 
885 		reason = pf_map_addr_sn(pd->af, r, saddr, naddr, NULL, NULL, &sn, &sh);
886 		if (reason != 0)
887 			goto notrans;
888 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
889 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
890 			    daddr, pd->af);
891 
892 		/* Do not change SCTP ports. */
893 		if (pd->proto == IPPROTO_SCTP)
894 			break;
895 
896 		if (r->rpool.proxy_port[1]) {
897 			uint32_t	tmp_nport;
898 
899 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
900 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
901 			    1)) + r->rpool.proxy_port[0];
902 
903 			/* Wrap around if necessary. */
904 			if (tmp_nport > 65535)
905 				tmp_nport -= 65535;
906 			nport = htons((uint16_t)tmp_nport);
907 		} else if (r->rpool.proxy_port[0])
908 			nport = htons(r->rpool.proxy_port[0]);
909 		else
910 			nport = dport;
911 
912 		/*
913 		 * Update the destination port.
914 		 */
915 		*nportp = nport;
916 
917 		/*
918 		 * Do we have a source port conflict in the stack state?  Try to
919 		 * modulate the source port if so.  Note that this is racy since
920 		 * the state lookup may not find any matches here but will once
921 		 * pf_create_state() actually instantiates the state.
922 		 */
923 		bzero(&key, sizeof(key));
924 		key.af = pd->af;
925 		key.proto = pd->proto;
926 		key.port[0] = sport;
927 		PF_ACPY(&key.addr[0], saddr, key.af);
928 		key.port[1] = nport;
929 		PF_ACPY(&key.addr[1], naddr, key.af);
930 
931 		if (!pf_find_state_all_exists(&key, PF_OUT))
932 			break;
933 
934 		tries = 0;
935 
936 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
937 		high = 65535;
938 		cut = arc4random() % (1 + high - low) + low;
939 		for (uint32_t tmp = cut;
940 		    tmp <= high && tmp <= UINT16_MAX &&
941 		    tries < V_pf_rdr_srcport_rewrite_tries;
942 		    tmp++, tries++) {
943 			key.port[0] = htons(tmp);
944 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
945 				/* Update the source port. */
946 				(*nkp)->port[0] = htons(tmp);
947 				goto out;
948 			}
949 		}
950 		for (uint32_t tmp = cut - 1;
951 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
952 		    tmp--, tries++) {
953 			key.port[0] = htons(tmp);
954 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
955 				/* Update the source port. */
956 				(*nkp)->port[0] = htons(tmp);
957 				goto out;
958 			}
959 		}
960 
961 		/*
962 		 * We failed to find a match.  Push on ahead anyway, let
963 		 * pf_state_insert() be the arbiter of whether the state
964 		 * conflict is tolerable.  In particular, with TCP connections
965 		 * the state may be reused if the TCP state is terminal.
966 		 */
967 		DPFPRINTF(PF_DEBUG_MISC,
968 		    ("pf: RDR source port allocation failed\n"));
969 		break;
970 
971 out:
972 		DPFPRINTF(PF_DEBUG_MISC,
973 		    ("pf: RDR source port allocation %u->%u\n",
974 		    ntohs(sport), ntohs((*nkp)->port[0])));
975 		break;
976 	}
977 	default:
978 		panic("%s: unknown action %u", __func__, r->action);
979 	}
980 
981 	/* Return success only if translation really happened. */
982 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
983 		*rp = r;
984 		return (PFRES_MATCH);
985 	}
986 
987 	reason = PFRES_MAX;
988 notrans:
989 	uma_zfree(V_pf_state_key_z, *nkp);
990 	uma_zfree(V_pf_state_key_z, *skp);
991 	*skp = *nkp = NULL;
992 
993 	return (reason);
994 }
995