xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision c49c9da239ca59722f104a64d9128e4b6052885a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 /*
56  * Limit the amount of work we do to find a free source port for redirects that
57  * introduce a state conflict.
58  */
59 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
60 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
61 
62 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
63 
64 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
65 			    struct pf_poolhashkey *, sa_family_t);
66 static struct pf_krule	*pf_match_translation(struct pf_pdesc *,
67 			    struct pf_addr *, u_int16_t,
68 			    struct pf_addr *, uint16_t, int,
69 			    struct pf_kanchor_stackframe *);
70 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
71     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
72     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **, struct pf_srchash**,
73     struct pf_udp_mapping **);
74 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
75 
76 #define mix(a,b,c) \
77 	do {					\
78 		a -= b; a -= c; a ^= (c >> 13);	\
79 		b -= c; b -= a; b ^= (a << 8);	\
80 		c -= a; c -= b; c ^= (b >> 13);	\
81 		a -= b; a -= c; a ^= (c >> 12);	\
82 		b -= c; b -= a; b ^= (a << 16);	\
83 		c -= a; c -= b; c ^= (b >> 5);	\
84 		a -= b; a -= c; a ^= (c >> 3);	\
85 		b -= c; b -= a; b ^= (a << 10);	\
86 		c -= a; c -= b; c ^= (b >> 15);	\
87 	} while (0)
88 
89 /*
90  * hash function based on bridge_hash in if_bridge.c
91  */
92 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)93 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
94     struct pf_poolhashkey *key, sa_family_t af)
95 {
96 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
97 
98 	switch (af) {
99 #ifdef INET
100 	case AF_INET:
101 		a += inaddr->addr32[0];
102 		b += key->key32[1];
103 		mix(a, b, c);
104 		hash->addr32[0] = c + key->key32[2];
105 		break;
106 #endif /* INET */
107 #ifdef INET6
108 	case AF_INET6:
109 		a += inaddr->addr32[0];
110 		b += inaddr->addr32[2];
111 		mix(a, b, c);
112 		hash->addr32[0] = c;
113 		a += inaddr->addr32[1];
114 		b += inaddr->addr32[3];
115 		c += key->key32[1];
116 		mix(a, b, c);
117 		hash->addr32[1] = c;
118 		a += inaddr->addr32[2];
119 		b += inaddr->addr32[1];
120 		c += key->key32[2];
121 		mix(a, b, c);
122 		hash->addr32[2] = c;
123 		a += inaddr->addr32[3];
124 		b += inaddr->addr32[0];
125 		c += key->key32[3];
126 		mix(a, b, c);
127 		hash->addr32[3] = c;
128 		break;
129 #endif /* INET6 */
130 	}
131 }
132 
133 static struct pf_krule *
pf_match_translation(struct pf_pdesc * pd,struct pf_addr * saddr,u_int16_t sport,struct pf_addr * daddr,uint16_t dport,int rs_num,struct pf_kanchor_stackframe * anchor_stack)134 pf_match_translation(struct pf_pdesc *pd,
135     struct pf_addr *saddr, u_int16_t sport,
136     struct pf_addr *daddr, uint16_t dport, int rs_num,
137     struct pf_kanchor_stackframe *anchor_stack)
138 {
139 	struct pf_krule		*r, *rm = NULL;
140 	struct pf_kruleset	*ruleset = NULL;
141 	int			 tag = -1;
142 	int			 rtableid = -1;
143 	int			 asd = 0;
144 
145 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
146 	while (r != NULL) {
147 		struct pf_rule_addr	*src = NULL, *dst = NULL;
148 		struct pf_addr_wrap	*xdst = NULL;
149 
150 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
151 			src = &r->dst;
152 			if (r->rpool.cur != NULL)
153 				xdst = &r->rpool.cur->addr;
154 		} else {
155 			src = &r->src;
156 			dst = &r->dst;
157 		}
158 
159 		pf_counter_u64_add(&r->evaluations, 1);
160 		if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot)
161 			r = r->skip[PF_SKIP_IFP];
162 		else if (r->direction && r->direction != pd->dir)
163 			r = r->skip[PF_SKIP_DIR];
164 		else if (r->af && r->af != pd->af)
165 			r = r->skip[PF_SKIP_AF];
166 		else if (r->proto && r->proto != pd->proto)
167 			r = r->skip[PF_SKIP_PROTO];
168 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
169 		    src->neg, pd->kif, M_GETFIB(pd->m)))
170 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
171 			    PF_SKIP_DST_ADDR];
172 		else if (src->port_op && !pf_match_port(src->port_op,
173 		    src->port[0], src->port[1], sport))
174 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
175 			    PF_SKIP_DST_PORT];
176 		else if (dst != NULL &&
177 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
178 		    M_GETFIB(pd->m)))
179 			r = r->skip[PF_SKIP_DST_ADDR];
180 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
181 		    0, NULL, M_GETFIB(pd->m)))
182 			r = TAILQ_NEXT(r, entries);
183 		else if (dst != NULL && dst->port_op &&
184 		    !pf_match_port(dst->port_op, dst->port[0],
185 		    dst->port[1], dport))
186 			r = r->skip[PF_SKIP_DST_PORT];
187 		else if (r->match_tag && !pf_match_tag(pd->m, r, &tag,
188 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
189 			r = TAILQ_NEXT(r, entries);
190 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
191 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
192 		    &pd->hdr.tcp), r->os_fingerprint)))
193 			r = TAILQ_NEXT(r, entries);
194 		else {
195 			if (r->tag)
196 				tag = r->tag;
197 			if (r->rtableid >= 0)
198 				rtableid = r->rtableid;
199 			if (r->anchor == NULL) {
200 				rm = r;
201 				if (rm->action == PF_NONAT ||
202 				    rm->action == PF_NORDR ||
203 				    rm->action == PF_NOBINAT) {
204 					rm = NULL;
205 				}
206 				break;
207 			} else
208 				pf_step_into_anchor(anchor_stack, &asd,
209 				    &ruleset, rs_num, &r, NULL, NULL);
210 		}
211 		if (r == NULL)
212 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
213 			    rs_num, &r, NULL, NULL);
214 	}
215 
216 	if (tag > 0 && pf_tag_packet(pd, tag))
217 		return (NULL);
218 	if (rtableid >= 0)
219 		M_SETFIB(pd->m, rtableid);
220 
221 	return (rm);
222 }
223 
224 static int
pf_get_sport(sa_family_t af,u_int8_t proto,struct pf_krule * r,struct pf_addr * saddr,uint16_t sport,struct pf_addr * daddr,uint16_t dport,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_udp_mapping ** udp_mapping)225 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
226     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
227     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
228     uint16_t high, struct pf_ksrc_node **sn, struct pf_srchash **sh,
229     struct pf_udp_mapping **udp_mapping)
230 {
231 	struct pf_state_key_cmp	key;
232 	struct pf_addr		init_addr;
233 
234 	bzero(&init_addr, sizeof(init_addr));
235 
236 	MPASS(*udp_mapping == NULL);
237 
238 	/*
239 	 * If we are UDP and have an existing mapping we can get source port
240 	 * from the mapping. In this case we have to look up the src_node as
241 	 * pf_map_addr would.
242 	 */
243 	if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
244 		struct pf_udp_endpoint_cmp udp_source;
245 
246 		bzero(&udp_source, sizeof(udp_source));
247 		udp_source.af = af;
248 		PF_ACPY(&udp_source.addr, saddr, af);
249 		udp_source.port = sport;
250 		*udp_mapping = pf_udp_mapping_find(&udp_source);
251 		if (*udp_mapping) {
252 			PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
253 			*nport = (*udp_mapping)->endpoints[1].port;
254 			/* Try to find a src_node as per pf_map_addr(). */
255 			if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
256 			    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
257 				*sn = pf_find_src_node(saddr, r, af, sh, false);
258 			if (*sn != NULL)
259 				PF_SRC_NODE_UNLOCK(*sn);
260 			return (0);
261 		} else {
262 			*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
263 			if (*udp_mapping == NULL)
264 				return (1);
265 		}
266 	}
267 
268 	if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn, sh))
269 		goto failed;
270 
271 	if (proto == IPPROTO_ICMP) {
272 		if (*nport == htons(ICMP_ECHO)) {
273 			low = 1;
274 			high = 65535;
275 		} else
276 			return (0);	/* Don't try to modify non-echo ICMP */
277 	}
278 #ifdef INET6
279 	if (proto == IPPROTO_ICMPV6) {
280 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
281 			low = 1;
282 			high = 65535;
283 		} else
284 			return (0);	/* Don't try to modify non-echo ICMP */
285 	}
286 #endif /* INET6 */
287 
288 	bzero(&key, sizeof(key));
289 	key.af = af;
290 	key.proto = proto;
291 	key.port[0] = dport;
292 	PF_ACPY(&key.addr[0], daddr, key.af);
293 
294 	do {
295 		PF_ACPY(&key.addr[1], naddr, key.af);
296 		if (*udp_mapping)
297 			PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
298 
299 		/*
300 		 * port search; start random, step;
301 		 * similar 2 portloop in in_pcbbind
302 		 */
303 		if (proto == IPPROTO_SCTP) {
304 			key.port[1] = sport;
305 			if (!pf_find_state_all_exists(&key, PF_IN)) {
306 				*nport = sport;
307 				return (0);
308 			} else {
309 				return (1); /* Fail mapping. */
310 			}
311 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
312 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
313 			/*
314 			 * XXX bug: icmp states don't use the id on both sides.
315 			 * (traceroute -I through nat)
316 			 */
317 			key.port[1] = sport;
318 			if (!pf_find_state_all_exists(&key, PF_IN)) {
319 				*nport = sport;
320 				return (0);
321 			}
322 		} else if (low == high) {
323 			key.port[1] = htons(low);
324 			if (!pf_find_state_all_exists(&key, PF_IN)) {
325 				if (*udp_mapping != NULL) {
326 					(*udp_mapping)->endpoints[1].port = htons(low);
327 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
328 						*nport = htons(low);
329 						return (0);
330 					}
331 				} else {
332 					*nport = htons(low);
333 					return (0);
334 				}
335 			}
336 		} else {
337 			uint32_t tmp;
338 			uint16_t cut;
339 
340 			if (low > high) {
341 				tmp = low;
342 				low = high;
343 				high = tmp;
344 			}
345 			/* low < high */
346 			cut = arc4random() % (1 + high - low) + low;
347 			/* low <= cut <= high */
348 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
349 				if (*udp_mapping != NULL) {
350 					(*udp_mapping)->endpoints[1].port = htons(tmp);
351 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
352 						*nport = htons(tmp);
353 						return (0);
354 					}
355 				} else {
356 					key.port[1] = htons(tmp);
357 					if (!pf_find_state_all_exists(&key, PF_IN)) {
358 						*nport = htons(tmp);
359 						return (0);
360 					}
361 				}
362 			}
363 			tmp = cut;
364 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
365 				if (proto == IPPROTO_UDP &&
366 				    (r->rpool.opts & PF_POOL_ENDPI)) {
367 					(*udp_mapping)->endpoints[1].port = htons(tmp);
368 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
369 						*nport = htons(tmp);
370 						return (0);
371 					}
372 				} else {
373 					key.port[1] = htons(tmp);
374 					if (!pf_find_state_all_exists(&key, PF_IN)) {
375 						*nport = htons(tmp);
376 						return (0);
377 					}
378 				}
379 			}
380 		}
381 
382 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
383 		case PF_POOL_RANDOM:
384 		case PF_POOL_ROUNDROBIN:
385 			/*
386 			 * pick a different source address since we're out
387 			 * of free port choices for the current one.
388 			 */
389 			(*sn) = NULL;
390 			if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn, sh))
391 				return (1);
392 			break;
393 		case PF_POOL_NONE:
394 		case PF_POOL_SRCHASH:
395 		case PF_POOL_BITMASK:
396 		default:
397 			return (1);
398 		}
399 	} while (! PF_AEQ(&init_addr, naddr, af) );
400 
401 failed:
402 	uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
403 	*udp_mapping = NULL;
404 	return (1);					/* none available */
405 }
406 
407 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)408 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
409 {
410 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
411 		return (true);
412 	return (false);
413 }
414 
415 static int
pf_get_mape_sport(sa_family_t af,u_int8_t proto,struct pf_krule * r,struct pf_addr * saddr,uint16_t sport,struct pf_addr * daddr,uint16_t dport,struct pf_addr * naddr,uint16_t * nport,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_udp_mapping ** udp_mapping)416 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
417     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
418     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
419     struct pf_ksrc_node **sn, struct pf_srchash **sh,
420     struct pf_udp_mapping **udp_mapping)
421 {
422 	uint16_t psmask, low, highmask;
423 	uint16_t i, ahigh, cut;
424 	int ashift, psidshift;
425 
426 	ashift = 16 - r->rpool.mape.offset;
427 	psidshift = ashift - r->rpool.mape.psidlen;
428 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
429 	psmask = psmask << psidshift;
430 	highmask = (1U << psidshift) - 1;
431 
432 	ahigh = (1U << r->rpool.mape.offset) - 1;
433 	cut = arc4random() & ahigh;
434 	if (cut == 0)
435 		cut = 1;
436 
437 	for (i = cut; i <= ahigh; i++) {
438 		low = (i << ashift) | psmask;
439 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
440 		    naddr, nport, low, low | highmask, sn, sh, udp_mapping))
441 			return (0);
442 	}
443 	for (i = cut - 1; i > 0; i--) {
444 		low = (i << ashift) | psmask;
445 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
446 		    naddr, nport, low, low | highmask, sn, sh, udp_mapping))
447 			return (0);
448 	}
449 	return (1);
450 }
451 
452 u_short
pf_map_addr(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr)453 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
454     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr)
455 {
456 	u_short			 reason = PFRES_MATCH;
457 	struct pf_kpool		*rpool = &r->rpool;
458 	struct pf_addr		*raddr = NULL, *rmask = NULL;
459 
460 	mtx_lock(&rpool->mtx);
461 	/* Find the route using chosen algorithm. Store the found route
462 	   in src_node if it was given or found. */
463 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
464 		reason = PFRES_MAPFAILED;
465 		goto done_pool_mtx;
466 	}
467 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
468 		switch (af) {
469 #ifdef INET
470 		case AF_INET:
471 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
472 			    (rpool->opts & PF_POOL_TYPEMASK) !=
473 			    PF_POOL_ROUNDROBIN) {
474 				reason = PFRES_MAPFAILED;
475 				goto done_pool_mtx;
476 			}
477 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
478 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
479 			break;
480 #endif /* INET */
481 #ifdef INET6
482 		case AF_INET6:
483 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
484 			    (rpool->opts & PF_POOL_TYPEMASK) !=
485 			    PF_POOL_ROUNDROBIN) {
486 				reason = PFRES_MAPFAILED;
487 				goto done_pool_mtx;
488 			}
489 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
490 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
491 			break;
492 #endif /* INET6 */
493 		}
494 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
495 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
496 			reason = PFRES_MAPFAILED;
497 			goto done_pool_mtx; /* unsupported */
498 		}
499 	} else {
500 		raddr = &rpool->cur->addr.v.a.addr;
501 		rmask = &rpool->cur->addr.v.a.mask;
502 	}
503 
504 	switch (rpool->opts & PF_POOL_TYPEMASK) {
505 	case PF_POOL_NONE:
506 		PF_ACPY(naddr, raddr, af);
507 		break;
508 	case PF_POOL_BITMASK:
509 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
510 		break;
511 	case PF_POOL_RANDOM:
512 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
513 			switch (af) {
514 #ifdef INET
515 			case AF_INET:
516 				rpool->counter.addr32[0] = htonl(arc4random());
517 				break;
518 #endif /* INET */
519 #ifdef INET6
520 			case AF_INET6:
521 				if (rmask->addr32[3] != 0xffffffff)
522 					rpool->counter.addr32[3] =
523 					    htonl(arc4random());
524 				else
525 					break;
526 				if (rmask->addr32[2] != 0xffffffff)
527 					rpool->counter.addr32[2] =
528 					    htonl(arc4random());
529 				else
530 					break;
531 				if (rmask->addr32[1] != 0xffffffff)
532 					rpool->counter.addr32[1] =
533 					    htonl(arc4random());
534 				else
535 					break;
536 				if (rmask->addr32[0] != 0xffffffff)
537 					rpool->counter.addr32[0] =
538 					    htonl(arc4random());
539 				break;
540 #endif /* INET6 */
541 			}
542 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
543 			PF_ACPY(init_addr, naddr, af);
544 
545 		} else {
546 			PF_AINC(&rpool->counter, af);
547 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
548 		}
549 		break;
550 	case PF_POOL_SRCHASH:
551 	    {
552 		unsigned char hash[16];
553 
554 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
555 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
556 		break;
557 	    }
558 	case PF_POOL_ROUNDROBIN:
559 	    {
560 		struct pf_kpooladdr *acur = rpool->cur;
561 
562 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
563 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
564 			    &rpool->tblidx, &rpool->counter, af, NULL))
565 				goto get_addr;
566 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
567 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
568 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
569 				goto get_addr;
570 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
571 			goto get_addr;
572 
573 	try_next:
574 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
575 			rpool->cur = TAILQ_FIRST(&rpool->list);
576 		else
577 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
578 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
579 			rpool->tblidx = -1;
580 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
581 			    &rpool->tblidx, &rpool->counter, af, NULL)) {
582 				/* table contains no address of type 'af' */
583 				if (rpool->cur != acur)
584 					goto try_next;
585 				reason = PFRES_MAPFAILED;
586 				goto done_pool_mtx;
587 			}
588 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
589 			rpool->tblidx = -1;
590 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
591 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
592 				/* table contains no address of type 'af' */
593 				if (rpool->cur != acur)
594 					goto try_next;
595 				reason = PFRES_MAPFAILED;
596 				goto done_pool_mtx;
597 			}
598 		} else {
599 			raddr = &rpool->cur->addr.v.a.addr;
600 			rmask = &rpool->cur->addr.v.a.mask;
601 			PF_ACPY(&rpool->counter, raddr, af);
602 		}
603 
604 	get_addr:
605 		PF_ACPY(naddr, &rpool->counter, af);
606 		if (init_addr != NULL && PF_AZERO(init_addr, af))
607 			PF_ACPY(init_addr, naddr, af);
608 		PF_AINC(&rpool->counter, af);
609 		break;
610 	    }
611 	}
612 
613 	if (nkif)
614 		*nkif = rpool->cur->kif;
615 
616 done_pool_mtx:
617 	mtx_unlock(&rpool->mtx);
618 
619 	if (reason) {
620 		counter_u64_add(V_pf_status.counters[reason], 1);
621 	}
622 
623 	return (reason);
624 }
625 
626 u_short
pf_map_addr_sn(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_ksrc_node ** sn,struct pf_srchash ** sh)627 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
628     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
629     struct pf_ksrc_node **sn, struct pf_srchash **sh)
630 {
631 	u_short			 reason = 0;
632 	struct pf_kpool		*rpool = &r->rpool;
633 
634 	KASSERT(*sn == NULL, ("*sn not NULL"));
635 
636 	/*
637 	 * If this is a sticky-address rule, try to find an existing src_node.
638 	 * Request the sh to be unlocked if sn was not found, as we never
639 	 * insert a new sn when parsing the ruleset.
640 	 */
641 	if (r->rpool.opts & PF_POOL_STICKYADDR &&
642 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
643 		*sn = pf_find_src_node(saddr, r, af, sh, false);
644 
645 	if (*sn != NULL) {
646 		PF_SRC_NODE_LOCK_ASSERT(*sn);
647 
648 		/* If the supplied address is the same as the current one we've
649 		 * been asked before, so tell the caller that there's no other
650 		 * address to be had. */
651 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
652 			reason = PFRES_MAPFAILED;
653 			goto done;
654 		}
655 
656 		PF_ACPY(naddr, &(*sn)->raddr, af);
657 		if (nkif)
658 			*nkif = (*sn)->rkif;
659 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
660 			printf("pf_map_addr: src tracking maps ");
661 			pf_print_host(saddr, 0, af);
662 			printf(" to ");
663 			pf_print_host(naddr, 0, af);
664 			if (nkif)
665 				printf("@%s", (*nkif)->pfik_name);
666 			printf("\n");
667 		}
668 		goto done;
669 	}
670 
671 	/*
672 	 * Source node has not been found. Find a new address and store it
673 	 * in variables given by the caller.
674 	 */
675 	if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr) != 0) {
676 		/* pf_map_addr() sets reason counters on its own */
677 		goto done;
678 	}
679 
680 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
681 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
682 		printf("pf_map_addr: selected address ");
683 		pf_print_host(naddr, 0, af);
684 		if (nkif)
685 			printf("@%s", (*nkif)->pfik_name);
686 		printf("\n");
687 	}
688 
689 done:
690 	if ((*sn) != NULL)
691 		PF_SRC_NODE_UNLOCK(*sn);
692 
693 	if (reason) {
694 		counter_u64_add(V_pf_status.counters[reason], 1);
695 	}
696 
697 	return (reason);
698 }
699 
700 u_short
pf_get_translation(struct pf_pdesc * pd,int off,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_addr * saddr,struct pf_addr * daddr,uint16_t sport,uint16_t dport,struct pf_kanchor_stackframe * anchor_stack,struct pf_krule ** rp,struct pf_udp_mapping ** udp_mapping)701 pf_get_translation(struct pf_pdesc *pd, int off,
702     struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr,
703     struct pf_addr *daddr, uint16_t sport, uint16_t dport,
704     struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp,
705     struct pf_udp_mapping **udp_mapping)
706 {
707 	struct pf_krule	*r = NULL;
708 	struct pf_addr	*naddr;
709 	struct pf_ksrc_node	*sn = NULL;
710 	struct pf_srchash	*sh = NULL;
711 	uint16_t	*nportp;
712 	uint16_t	 low, high;
713 	u_short		 reason;
714 
715 	PF_RULES_RASSERT();
716 	KASSERT(*skp == NULL, ("*skp not NULL"));
717 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
718 
719 	*rp = NULL;
720 
721 	if (pd->dir == PF_OUT) {
722 		r = pf_match_translation(pd, saddr,
723 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
724 		if (r == NULL)
725 			r = pf_match_translation(pd,
726 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
727 			    anchor_stack);
728 	} else {
729 		r = pf_match_translation(pd, saddr,
730 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
731 		if (r == NULL)
732 			r = pf_match_translation(pd,
733 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
734 			    anchor_stack);
735 	}
736 
737 	if (r == NULL)
738 		return (PFRES_MAX);
739 
740 	switch (r->action) {
741 	case PF_NONAT:
742 	case PF_NOBINAT:
743 	case PF_NORDR:
744 		return (PFRES_MAX);
745 	}
746 
747 	*skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
748 	if (*skp == NULL)
749 		return (PFRES_MEMORY);
750 	*nkp = pf_state_key_clone(*skp);
751 	if (*nkp == NULL) {
752 		uma_zfree(V_pf_state_key_z, *skp);
753 		*skp = NULL;
754 		return (PFRES_MEMORY);
755 	}
756 
757 	naddr = &(*nkp)->addr[1];
758 	nportp = &(*nkp)->port[1];
759 
760 	switch (r->action) {
761 	case PF_NAT:
762 		if (pd->proto == IPPROTO_ICMP) {
763 			low = 1;
764 			high = 65535;
765 		} else {
766 			low  = r->rpool.proxy_port[0];
767 			high = r->rpool.proxy_port[1];
768 		}
769 		if (r->rpool.mape.offset > 0) {
770 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
771 			    sport, daddr, dport, naddr, nportp, &sn, &sh,
772 			    udp_mapping)) {
773 				DPFPRINTF(PF_DEBUG_MISC,
774 				    ("pf: MAP-E port allocation (%u/%u/%u)"
775 				    " failed\n",
776 				    r->rpool.mape.offset,
777 				    r->rpool.mape.psidlen,
778 				    r->rpool.mape.psid));
779 				reason = PFRES_MAPFAILED;
780 				goto notrans;
781 			}
782 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
783 		    daddr, dport, naddr, nportp, low, high, &sn, &sh,
784 		    udp_mapping)) {
785 			DPFPRINTF(PF_DEBUG_MISC,
786 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
787 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
788 			reason = PFRES_MAPFAILED;
789 			goto notrans;
790 		}
791 		break;
792 	case PF_BINAT:
793 		switch (pd->dir) {
794 		case PF_OUT:
795 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
796 				switch (pd->af) {
797 #ifdef INET
798 				case AF_INET:
799 					if (r->rpool.cur->addr.p.dyn->
800 					    pfid_acnt4 < 1) {
801 						reason = PFRES_MAPFAILED;
802 						goto notrans;
803 					}
804 					PF_POOLMASK(naddr,
805 					    &r->rpool.cur->addr.p.dyn->
806 					    pfid_addr4,
807 					    &r->rpool.cur->addr.p.dyn->
808 					    pfid_mask4, saddr, AF_INET);
809 					break;
810 #endif /* INET */
811 #ifdef INET6
812 				case AF_INET6:
813 					if (r->rpool.cur->addr.p.dyn->
814 					    pfid_acnt6 < 1) {
815 						reason = PFRES_MAPFAILED;
816 						goto notrans;
817 					}
818 					PF_POOLMASK(naddr,
819 					    &r->rpool.cur->addr.p.dyn->
820 					    pfid_addr6,
821 					    &r->rpool.cur->addr.p.dyn->
822 					    pfid_mask6, saddr, AF_INET6);
823 					break;
824 #endif /* INET6 */
825 				}
826 			} else
827 				PF_POOLMASK(naddr,
828 				    &r->rpool.cur->addr.v.a.addr,
829 				    &r->rpool.cur->addr.v.a.mask, saddr,
830 				    pd->af);
831 			break;
832 		case PF_IN:
833 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
834 				switch (pd->af) {
835 #ifdef INET
836 				case AF_INET:
837 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
838 						reason = PFRES_MAPFAILED;
839 						goto notrans;
840 					}
841 					PF_POOLMASK(naddr,
842 					    &r->src.addr.p.dyn->pfid_addr4,
843 					    &r->src.addr.p.dyn->pfid_mask4,
844 					    daddr, AF_INET);
845 					break;
846 #endif /* INET */
847 #ifdef INET6
848 				case AF_INET6:
849 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
850 						reason = PFRES_MAPFAILED;
851 						goto notrans;
852 					}
853 					PF_POOLMASK(naddr,
854 					    &r->src.addr.p.dyn->pfid_addr6,
855 					    &r->src.addr.p.dyn->pfid_mask6,
856 					    daddr, AF_INET6);
857 					break;
858 #endif /* INET6 */
859 				}
860 			} else
861 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
862 				    &r->src.addr.v.a.mask, daddr, pd->af);
863 			break;
864 		}
865 		break;
866 	case PF_RDR: {
867 		struct pf_state_key_cmp key;
868 		int tries;
869 		uint16_t cut, low, high, nport;
870 
871 		reason = pf_map_addr_sn(pd->af, r, saddr, naddr, NULL, NULL, &sn, &sh);
872 		if (reason != 0)
873 			goto notrans;
874 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
875 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
876 			    daddr, pd->af);
877 
878 		/* Do not change SCTP ports. */
879 		if (pd->proto == IPPROTO_SCTP)
880 			break;
881 
882 		if (r->rpool.proxy_port[1]) {
883 			uint32_t	tmp_nport;
884 
885 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
886 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
887 			    1)) + r->rpool.proxy_port[0];
888 
889 			/* Wrap around if necessary. */
890 			if (tmp_nport > 65535)
891 				tmp_nport -= 65535;
892 			nport = htons((uint16_t)tmp_nport);
893 		} else if (r->rpool.proxy_port[0])
894 			nport = htons(r->rpool.proxy_port[0]);
895 		else
896 			nport = dport;
897 
898 		/*
899 		 * Update the destination port.
900 		 */
901 		*nportp = nport;
902 
903 		/*
904 		 * Do we have a source port conflict in the stack state?  Try to
905 		 * modulate the source port if so.  Note that this is racy since
906 		 * the state lookup may not find any matches here but will once
907 		 * pf_create_state() actually instantiates the state.
908 		 */
909 		bzero(&key, sizeof(key));
910 		key.af = pd->af;
911 		key.proto = pd->proto;
912 		key.port[0] = sport;
913 		PF_ACPY(&key.addr[0], saddr, key.af);
914 		key.port[1] = nport;
915 		PF_ACPY(&key.addr[1], naddr, key.af);
916 
917 		if (!pf_find_state_all_exists(&key, PF_OUT))
918 			break;
919 
920 		tries = 0;
921 
922 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
923 		high = 65535;
924 		cut = arc4random() % (1 + high - low) + low;
925 		for (uint32_t tmp = cut;
926 		    tmp <= high && tmp <= UINT16_MAX &&
927 		    tries < V_pf_rdr_srcport_rewrite_tries;
928 		    tmp++, tries++) {
929 			key.port[0] = htons(tmp);
930 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
931 				/* Update the source port. */
932 				(*nkp)->port[0] = htons(tmp);
933 				goto out;
934 			}
935 		}
936 		for (uint32_t tmp = cut - 1;
937 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
938 		    tmp--, tries++) {
939 			key.port[0] = htons(tmp);
940 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
941 				/* Update the source port. */
942 				(*nkp)->port[0] = htons(tmp);
943 				goto out;
944 			}
945 		}
946 
947 		/*
948 		 * We failed to find a match.  Push on ahead anyway, let
949 		 * pf_state_insert() be the arbiter of whether the state
950 		 * conflict is tolerable.  In particular, with TCP connections
951 		 * the state may be reused if the TCP state is terminal.
952 		 */
953 		DPFPRINTF(PF_DEBUG_MISC,
954 		    ("pf: RDR source port allocation failed\n"));
955 		break;
956 
957 out:
958 		DPFPRINTF(PF_DEBUG_MISC,
959 		    ("pf: RDR source port allocation %u->%u\n",
960 		    ntohs(sport), ntohs((*nkp)->port[0])));
961 		break;
962 	}
963 	default:
964 		panic("%s: unknown action %u", __func__, r->action);
965 	}
966 
967 	/* Return success only if translation really happened. */
968 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
969 		*rp = r;
970 		return (PFRES_MATCH);
971 	}
972 
973 	reason = PFRES_MAX;
974 notrans:
975 	uma_zfree(V_pf_state_key_z, *nkp);
976 	uma_zfree(V_pf_state_key_z, *skp);
977 	*skp = *nkp = NULL;
978 
979 	return (reason);
980 }
981