1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37 */
38
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/vnet.h>
53 #include <net/pfvar.h>
54 #include <net/if_pflog.h>
55
56 #ifdef INET
57 #include <netinet/in_var.h>
58 #endif
59
60 #ifdef INET6
61 #include <netinet6/in6_var.h>
62 #endif
63
64
65 /*
66 * Limit the amount of work we do to find a free source port for redirects that
67 * introduce a state conflict.
68 */
69 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries)
70 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
71
72 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
73
74 static void pf_hash(struct pf_addr *, struct pf_addr *,
75 struct pf_poolhashkey *, sa_family_t);
76 static struct pf_krule *pf_match_translation(struct pf_pdesc *,
77 int, struct pf_kanchor_stackframe *);
78 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *,
79 struct pf_addr *, uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
80 struct pf_srchash **, struct pf_kpool *, struct pf_udp_mapping **);
81 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *);
82
83 #define mix(a,b,c) \
84 do { \
85 a -= b; a -= c; a ^= (c >> 13); \
86 b -= c; b -= a; b ^= (a << 8); \
87 c -= a; c -= b; c ^= (b >> 13); \
88 a -= b; a -= c; a ^= (c >> 12); \
89 b -= c; b -= a; b ^= (a << 16); \
90 c -= a; c -= b; c ^= (b >> 5); \
91 a -= b; a -= c; a ^= (c >> 3); \
92 b -= c; b -= a; b ^= (a << 10); \
93 c -= a; c -= b; c ^= (b >> 15); \
94 } while (0)
95
96 /*
97 * hash function based on bridge_hash in if_bridge.c
98 */
99 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)100 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
101 struct pf_poolhashkey *key, sa_family_t af)
102 {
103 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
104
105 switch (af) {
106 #ifdef INET
107 case AF_INET:
108 a += inaddr->addr32[0];
109 b += key->key32[1];
110 mix(a, b, c);
111 hash->addr32[0] = c + key->key32[2];
112 break;
113 #endif /* INET */
114 #ifdef INET6
115 case AF_INET6:
116 a += inaddr->addr32[0];
117 b += inaddr->addr32[2];
118 mix(a, b, c);
119 hash->addr32[0] = c;
120 a += inaddr->addr32[1];
121 b += inaddr->addr32[3];
122 c += key->key32[1];
123 mix(a, b, c);
124 hash->addr32[1] = c;
125 a += inaddr->addr32[2];
126 b += inaddr->addr32[1];
127 c += key->key32[2];
128 mix(a, b, c);
129 hash->addr32[2] = c;
130 a += inaddr->addr32[3];
131 b += inaddr->addr32[0];
132 c += key->key32[3];
133 mix(a, b, c);
134 hash->addr32[3] = c;
135 break;
136 #endif /* INET6 */
137 }
138 }
139
140 static struct pf_krule *
pf_match_translation(struct pf_pdesc * pd,int rs_num,struct pf_kanchor_stackframe * anchor_stack)141 pf_match_translation(struct pf_pdesc *pd,
142 int rs_num, struct pf_kanchor_stackframe *anchor_stack)
143 {
144 struct pf_krule *r, *rm = NULL;
145 struct pf_kruleset *ruleset = NULL;
146 int tag = -1;
147 int rtableid = -1;
148 int asd = 0;
149
150 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
151 while (r != NULL) {
152 struct pf_rule_addr *src = NULL, *dst = NULL;
153 struct pf_addr_wrap *xdst = NULL;
154
155 if (r->action == PF_BINAT && pd->dir == PF_IN) {
156 src = &r->dst;
157 if (r->rdr.cur != NULL)
158 xdst = &r->rdr.cur->addr;
159 } else {
160 src = &r->src;
161 dst = &r->dst;
162 }
163
164 pf_counter_u64_add(&r->evaluations, 1);
165 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot)
166 r = r->skip[PF_SKIP_IFP];
167 else if (r->direction && r->direction != pd->dir)
168 r = r->skip[PF_SKIP_DIR];
169 else if (r->af && r->af != pd->af)
170 r = r->skip[PF_SKIP_AF];
171 else if (r->proto && r->proto != pd->proto)
172 r = r->skip[PF_SKIP_PROTO];
173 else if (PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af,
174 src->neg, pd->kif, M_GETFIB(pd->m)))
175 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
176 PF_SKIP_DST_ADDR];
177 else if (src->port_op && !pf_match_port(src->port_op,
178 src->port[0], src->port[1], pd->nsport))
179 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
180 PF_SKIP_DST_PORT];
181 else if (dst != NULL &&
182 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL,
183 M_GETFIB(pd->m)))
184 r = r->skip[PF_SKIP_DST_ADDR];
185 else if (xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af,
186 0, NULL, M_GETFIB(pd->m)))
187 r = TAILQ_NEXT(r, entries);
188 else if (dst != NULL && dst->port_op &&
189 !pf_match_port(dst->port_op, dst->port[0],
190 dst->port[1], pd->ndport))
191 r = r->skip[PF_SKIP_DST_PORT];
192 else if (r->match_tag && !pf_match_tag(pd->m, r, &tag,
193 pd->pf_mtag ? pd->pf_mtag->tag : 0))
194 r = TAILQ_NEXT(r, entries);
195 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
196 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
197 &pd->hdr.tcp), r->os_fingerprint)))
198 r = TAILQ_NEXT(r, entries);
199 else {
200 if (r->tag)
201 tag = r->tag;
202 if (r->rtableid >= 0)
203 rtableid = r->rtableid;
204 if (r->anchor == NULL) {
205 rm = r;
206 if (rm->action == PF_NONAT ||
207 rm->action == PF_NORDR ||
208 rm->action == PF_NOBINAT) {
209 rm = NULL;
210 }
211 break;
212 } else
213 pf_step_into_anchor(anchor_stack, &asd,
214 &ruleset, rs_num, &r, NULL, NULL);
215 }
216 if (r == NULL)
217 pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
218 rs_num, &r, NULL, NULL);
219 }
220
221 if (tag > 0 && pf_tag_packet(pd, tag))
222 return (NULL);
223 if (rtableid >= 0)
224 M_SETFIB(pd->m, rtableid);
225
226 return (rm);
227 }
228
229 static int
pf_get_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_kpool * rpool,struct pf_udp_mapping ** udp_mapping)230 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
231 struct pf_addr *naddr, uint16_t *nport, uint16_t low,
232 uint16_t high, struct pf_ksrc_node **sn,
233 struct pf_srchash **sh, struct pf_kpool *rpool,
234 struct pf_udp_mapping **udp_mapping)
235 {
236 struct pf_state_key_cmp key;
237 struct pf_addr init_addr;
238
239 bzero(&init_addr, sizeof(init_addr));
240
241 if (! TAILQ_EMPTY(&r->nat.list) &&
242 pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr,
243 sn, sh, &r->nat))
244 return (1);
245
246 if (udp_mapping) {
247 MPASS(*udp_mapping == NULL);
248 }
249
250 /*
251 * If we are UDP and have an existing mapping we can get source port
252 * from the mapping. In this case we have to look up the src_node as
253 * pf_map_addr would.
254 */
255 if (pd->proto == IPPROTO_UDP && (r->rdr.opts & PF_POOL_ENDPI)) {
256 struct pf_udp_endpoint_cmp udp_source;
257
258 bzero(&udp_source, sizeof(udp_source));
259 udp_source.af = pd->af;
260 PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af);
261 udp_source.port = pd->nsport;
262 if (udp_mapping) {
263 *udp_mapping = pf_udp_mapping_find(&udp_source);
264 if (*udp_mapping) {
265 PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af);
266 *nport = (*udp_mapping)->endpoints[1].port;
267 /* Try to find a src_node as per pf_map_addr(). */
268 if (*sn == NULL && r->rdr.opts & PF_POOL_STICKYADDR &&
269 (r->rdr.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
270 *sn = pf_find_src_node(&pd->nsaddr, r, pd->af, sh, false);
271 if (*sn != NULL)
272 PF_SRC_NODE_UNLOCK(*sn);
273 return (0);
274 } else {
275 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr,
276 pd->nsport, &init_addr, 0);
277 if (*udp_mapping == NULL)
278 return (1);
279 }
280 }
281 }
282
283 if (pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL, &init_addr,
284 sn, sh, rpool))
285 goto failed;
286
287 if (pd->proto == IPPROTO_ICMP) {
288 if (*nport == htons(ICMP_ECHO)) {
289 low = 1;
290 high = 65535;
291 } else
292 return (0); /* Don't try to modify non-echo ICMP */
293 }
294 #ifdef INET6
295 if (pd->proto == IPPROTO_ICMPV6) {
296 if (*nport == htons(ICMP6_ECHO_REQUEST)) {
297 low = 1;
298 high = 65535;
299 } else
300 return (0); /* Don't try to modify non-echo ICMP */
301 }
302 #endif /* INET6 */
303
304 bzero(&key, sizeof(key));
305 key.af = pd->naf;
306 key.proto = pd->proto;
307 key.port[0] = pd->ndport;
308 PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
309
310 do {
311 PF_ACPY(&key.addr[1], naddr, key.af);
312 if (udp_mapping && *udp_mapping)
313 PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af);
314
315 /*
316 * port search; start random, step;
317 * similar 2 portloop in in_pcbbind
318 */
319 if (pd->proto == IPPROTO_SCTP) {
320 key.port[1] = pd->nsport;
321 if (!pf_find_state_all_exists(&key, PF_IN)) {
322 *nport = pd->nsport;
323 return (0);
324 } else {
325 return (1); /* Fail mapping. */
326 }
327 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
328 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
329 /*
330 * XXX bug: icmp states don't use the id on both sides.
331 * (traceroute -I through nat)
332 */
333 key.port[1] = pd->nsport;
334 if (!pf_find_state_all_exists(&key, PF_IN)) {
335 *nport = pd->nsport;
336 return (0);
337 }
338 } else if (low == high) {
339 key.port[1] = htons(low);
340 if (!pf_find_state_all_exists(&key, PF_IN)) {
341 if (udp_mapping && *udp_mapping != NULL) {
342 (*udp_mapping)->endpoints[1].port = htons(low);
343 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
344 *nport = htons(low);
345 return (0);
346 }
347 } else {
348 *nport = htons(low);
349 return (0);
350 }
351 }
352 } else {
353 uint32_t tmp;
354 uint16_t cut;
355
356 if (low > high) {
357 tmp = low;
358 low = high;
359 high = tmp;
360 }
361 /* low < high */
362 cut = arc4random() % (1 + high - low) + low;
363 /* low <= cut <= high */
364 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
365 if (udp_mapping && *udp_mapping != NULL) {
366 (*udp_mapping)->endpoints[1].port = htons(tmp);
367 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
368 *nport = htons(tmp);
369 return (0);
370 }
371 } else {
372 key.port[1] = htons(tmp);
373 if (!pf_find_state_all_exists(&key, PF_IN)) {
374 *nport = htons(tmp);
375 return (0);
376 }
377 }
378 }
379 tmp = cut;
380 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
381 if (pd->proto == IPPROTO_UDP &&
382 (r->rdr.opts & PF_POOL_ENDPI &&
383 udp_mapping != NULL)) {
384 (*udp_mapping)->endpoints[1].port = htons(tmp);
385 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
386 *nport = htons(tmp);
387 return (0);
388 }
389 } else {
390 key.port[1] = htons(tmp);
391 if (!pf_find_state_all_exists(&key, PF_IN)) {
392 *nport = htons(tmp);
393 return (0);
394 }
395 }
396 }
397 }
398
399 switch (r->rdr.opts & PF_POOL_TYPEMASK) {
400 case PF_POOL_RANDOM:
401 case PF_POOL_ROUNDROBIN:
402 /*
403 * pick a different source address since we're out
404 * of free port choices for the current one.
405 */
406 (*sn) = NULL;
407 if (pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL,
408 &init_addr, sn, sh, &r->rdr))
409 return (1);
410 break;
411 case PF_POOL_NONE:
412 case PF_POOL_SRCHASH:
413 case PF_POOL_BITMASK:
414 default:
415 return (1);
416 }
417 } while (! PF_AEQ(&init_addr, naddr, pd->naf) );
418
419 failed:
420 if (udp_mapping) {
421 uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
422 *udp_mapping = NULL;
423 }
424
425 return (1); /* none available */
426 }
427
428 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)429 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
430 {
431 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
432 return (true);
433 return (false);
434 }
435
436 static int
pf_get_mape_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_udp_mapping ** udp_mapping)437 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
438 struct pf_addr *naddr, uint16_t *nport,
439 struct pf_ksrc_node **sn, struct pf_srchash **sh,
440 struct pf_udp_mapping **udp_mapping)
441 {
442 uint16_t psmask, low, highmask;
443 uint16_t i, ahigh, cut;
444 int ashift, psidshift;
445
446 ashift = 16 - r->rdr.mape.offset;
447 psidshift = ashift - r->rdr.mape.psidlen;
448 psmask = r->rdr.mape.psid & ((1U << r->rdr.mape.psidlen) - 1);
449 psmask = psmask << psidshift;
450 highmask = (1U << psidshift) - 1;
451
452 ahigh = (1U << r->rdr.mape.offset) - 1;
453 cut = arc4random() & ahigh;
454 if (cut == 0)
455 cut = 1;
456
457 for (i = cut; i <= ahigh; i++) {
458 low = (i << ashift) | psmask;
459 if (!pf_get_sport(pd, r,
460 naddr, nport, low, low | highmask, sn, sh, &r->rdr,
461 udp_mapping))
462 return (0);
463 }
464 for (i = cut - 1; i > 0; i--) {
465 low = (i << ashift) | psmask;
466 if (!pf_get_sport(pd, r,
467 naddr, nport, low, low | highmask, sn, sh, &r->rdr,
468 udp_mapping))
469 return (0);
470 }
471 return (1);
472 }
473
474 u_short
pf_map_addr(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_kpool * rpool)475 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
476 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
477 struct pf_kpool *rpool)
478 {
479 u_short reason = PFRES_MATCH;
480 struct pf_addr *raddr = NULL, *rmask = NULL;
481
482 mtx_lock(&rpool->mtx);
483 /* Find the route using chosen algorithm. Store the found route
484 in src_node if it was given or found. */
485 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
486 reason = PFRES_MAPFAILED;
487 goto done_pool_mtx;
488 }
489 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
490 switch (af) {
491 #ifdef INET
492 case AF_INET:
493 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
494 (rpool->opts & PF_POOL_TYPEMASK) !=
495 PF_POOL_ROUNDROBIN) {
496 reason = PFRES_MAPFAILED;
497 goto done_pool_mtx;
498 }
499 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
500 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
501 break;
502 #endif /* INET */
503 #ifdef INET6
504 case AF_INET6:
505 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
506 (rpool->opts & PF_POOL_TYPEMASK) !=
507 PF_POOL_ROUNDROBIN) {
508 reason = PFRES_MAPFAILED;
509 goto done_pool_mtx;
510 }
511 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
512 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
513 break;
514 #endif /* INET6 */
515 }
516 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
517 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
518 reason = PFRES_MAPFAILED;
519 goto done_pool_mtx; /* unsupported */
520 }
521 } else {
522 raddr = &rpool->cur->addr.v.a.addr;
523 rmask = &rpool->cur->addr.v.a.mask;
524 }
525
526 switch (rpool->opts & PF_POOL_TYPEMASK) {
527 case PF_POOL_NONE:
528 PF_ACPY(naddr, raddr, af);
529 break;
530 case PF_POOL_BITMASK:
531 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
532 break;
533 case PF_POOL_RANDOM:
534 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
535 switch (af) {
536 #ifdef INET
537 case AF_INET:
538 rpool->counter.addr32[0] = htonl(arc4random());
539 break;
540 #endif /* INET */
541 #ifdef INET6
542 case AF_INET6:
543 if (rmask->addr32[3] != 0xffffffff)
544 rpool->counter.addr32[3] =
545 htonl(arc4random());
546 else
547 break;
548 if (rmask->addr32[2] != 0xffffffff)
549 rpool->counter.addr32[2] =
550 htonl(arc4random());
551 else
552 break;
553 if (rmask->addr32[1] != 0xffffffff)
554 rpool->counter.addr32[1] =
555 htonl(arc4random());
556 else
557 break;
558 if (rmask->addr32[0] != 0xffffffff)
559 rpool->counter.addr32[0] =
560 htonl(arc4random());
561 break;
562 #endif /* INET6 */
563 }
564 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
565 PF_ACPY(init_addr, naddr, af);
566
567 } else {
568 PF_AINC(&rpool->counter, af);
569 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
570 }
571 break;
572 case PF_POOL_SRCHASH:
573 {
574 unsigned char hash[16];
575
576 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
577 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
578 break;
579 }
580 case PF_POOL_ROUNDROBIN:
581 {
582 struct pf_kpooladdr *acur = rpool->cur;
583
584 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
585 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
586 &rpool->tblidx, &rpool->counter, af, NULL))
587 goto get_addr;
588 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
589 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
590 &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
591 goto get_addr;
592 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
593 goto get_addr;
594
595 try_next:
596 if (TAILQ_NEXT(rpool->cur, entries) == NULL)
597 rpool->cur = TAILQ_FIRST(&rpool->list);
598 else
599 rpool->cur = TAILQ_NEXT(rpool->cur, entries);
600 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
601 if (pfr_pool_get(rpool->cur->addr.p.tbl,
602 &rpool->tblidx, &rpool->counter, af, NULL)) {
603 /* table contains no address of type 'af' */
604 if (rpool->cur != acur)
605 goto try_next;
606 reason = PFRES_MAPFAILED;
607 goto done_pool_mtx;
608 }
609 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
610 rpool->tblidx = -1;
611 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
612 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
613 /* table contains no address of type 'af' */
614 if (rpool->cur != acur)
615 goto try_next;
616 reason = PFRES_MAPFAILED;
617 goto done_pool_mtx;
618 }
619 } else {
620 raddr = &rpool->cur->addr.v.a.addr;
621 rmask = &rpool->cur->addr.v.a.mask;
622 PF_ACPY(&rpool->counter, raddr, af);
623 }
624
625 get_addr:
626 PF_ACPY(naddr, &rpool->counter, af);
627 if (init_addr != NULL && PF_AZERO(init_addr, af))
628 PF_ACPY(init_addr, naddr, af);
629 PF_AINC(&rpool->counter, af);
630 break;
631 }
632 }
633
634 if (nkif)
635 *nkif = rpool->cur->kif;
636
637 done_pool_mtx:
638 mtx_unlock(&rpool->mtx);
639
640 if (reason) {
641 counter_u64_add(V_pf_status.counters[reason], 1);
642 }
643
644 return (reason);
645 }
646
647 u_short
pf_map_addr_sn(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_kpool * rpool)648 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
649 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
650 struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool)
651 {
652 u_short reason = 0;
653
654 KASSERT(*sn == NULL, ("*sn not NULL"));
655
656 /*
657 * If this is a sticky-address rule, try to find an existing src_node.
658 * Request the sh to be unlocked if sn was not found, as we never
659 * insert a new sn when parsing the ruleset.
660 */
661 if (r->rdr.opts & PF_POOL_STICKYADDR &&
662 (r->rdr.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
663 *sn = pf_find_src_node(saddr, r, af, sh, false);
664
665 if (*sn != NULL) {
666 PF_SRC_NODE_LOCK_ASSERT(*sn);
667
668 /* If the supplied address is the same as the current one we've
669 * been asked before, so tell the caller that there's no other
670 * address to be had. */
671 if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
672 reason = PFRES_MAPFAILED;
673 goto done;
674 }
675
676 PF_ACPY(naddr, &(*sn)->raddr, af);
677 if (nkif)
678 *nkif = (*sn)->rkif;
679 if (V_pf_status.debug >= PF_DEBUG_NOISY) {
680 printf("pf_map_addr: src tracking maps ");
681 pf_print_host(saddr, 0, af);
682 printf(" to ");
683 pf_print_host(naddr, 0, af);
684 if (nkif)
685 printf("@%s", (*nkif)->pfik_name);
686 printf("\n");
687 }
688 goto done;
689 }
690
691 /*
692 * Source node has not been found. Find a new address and store it
693 * in variables given by the caller.
694 */
695 if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) {
696 /* pf_map_addr() sets reason counters on its own */
697 goto done;
698 }
699
700 if (V_pf_status.debug >= PF_DEBUG_NOISY &&
701 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
702 printf("pf_map_addr: selected address ");
703 pf_print_host(naddr, 0, af);
704 if (nkif)
705 printf("@%s", (*nkif)->pfik_name);
706 printf("\n");
707 }
708
709 done:
710 if ((*sn) != NULL)
711 PF_SRC_NODE_UNLOCK(*sn);
712
713 if (reason) {
714 counter_u64_add(V_pf_status.counters[reason], 1);
715 }
716
717 return (reason);
718 }
719
720 u_short
pf_get_translation(struct pf_pdesc * pd,int off,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_kanchor_stackframe * anchor_stack,struct pf_krule ** rp,struct pf_udp_mapping ** udp_mapping)721 pf_get_translation(struct pf_pdesc *pd, int off,
722 struct pf_state_key **skp, struct pf_state_key **nkp,
723 struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp,
724 struct pf_udp_mapping **udp_mapping)
725 {
726 struct pf_krule *r = NULL;
727 struct pf_addr *naddr;
728 struct pf_ksrc_node *sn = NULL;
729 struct pf_srchash *sh = NULL;
730 uint16_t *nportp;
731 uint16_t low, high;
732 u_short reason;
733
734 PF_RULES_RASSERT();
735 KASSERT(*skp == NULL, ("*skp not NULL"));
736 KASSERT(*nkp == NULL, ("*nkp not NULL"));
737
738 *rp = NULL;
739
740 if (pd->dir == PF_OUT) {
741 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack);
742 if (r == NULL)
743 r = pf_match_translation(pd, PF_RULESET_NAT, anchor_stack);
744 } else {
745 r = pf_match_translation(pd, PF_RULESET_RDR, anchor_stack);
746 if (r == NULL)
747 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack);
748 }
749
750 if (r == NULL)
751 return (PFRES_MAX);
752
753 switch (r->action) {
754 case PF_NONAT:
755 case PF_NOBINAT:
756 case PF_NORDR:
757 return (PFRES_MAX);
758 }
759
760 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, skp, nkp))
761 return (PFRES_MEMORY);
762
763 naddr = &(*nkp)->addr[1];
764 nportp = &(*nkp)->port[1];
765
766 switch (r->action) {
767 case PF_NAT:
768 if (pd->proto == IPPROTO_ICMP) {
769 low = 1;
770 high = 65535;
771 } else {
772 low = r->rdr.proxy_port[0];
773 high = r->rdr.proxy_port[1];
774 }
775 if (r->rdr.mape.offset > 0) {
776 if (pf_get_mape_sport(pd, r, naddr, nportp, &sn,
777 &sh, udp_mapping)) {
778 DPFPRINTF(PF_DEBUG_MISC,
779 ("pf: MAP-E port allocation (%u/%u/%u)"
780 " failed\n",
781 r->rdr.mape.offset,
782 r->rdr.mape.psidlen,
783 r->rdr.mape.psid));
784 reason = PFRES_MAPFAILED;
785 goto notrans;
786 }
787 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn,
788 &sh, &r->rdr, udp_mapping)) {
789 DPFPRINTF(PF_DEBUG_MISC,
790 ("pf: NAT proxy port allocation (%u-%u) failed\n",
791 r->rdr.proxy_port[0], r->rdr.proxy_port[1]));
792 reason = PFRES_MAPFAILED;
793 goto notrans;
794 }
795 break;
796 case PF_BINAT:
797 switch (pd->dir) {
798 case PF_OUT:
799 if (r->rdr.cur->addr.type == PF_ADDR_DYNIFTL){
800 switch (pd->af) {
801 #ifdef INET
802 case AF_INET:
803 if (r->rdr.cur->addr.p.dyn->
804 pfid_acnt4 < 1) {
805 reason = PFRES_MAPFAILED;
806 goto notrans;
807 }
808 PF_POOLMASK(naddr,
809 &r->rdr.cur->addr.p.dyn->
810 pfid_addr4,
811 &r->rdr.cur->addr.p.dyn->
812 pfid_mask4, &pd->nsaddr, AF_INET);
813 break;
814 #endif /* INET */
815 #ifdef INET6
816 case AF_INET6:
817 if (r->rdr.cur->addr.p.dyn->
818 pfid_acnt6 < 1) {
819 reason = PFRES_MAPFAILED;
820 goto notrans;
821 }
822 PF_POOLMASK(naddr,
823 &r->rdr.cur->addr.p.dyn->
824 pfid_addr6,
825 &r->rdr.cur->addr.p.dyn->
826 pfid_mask6, &pd->nsaddr, AF_INET6);
827 break;
828 #endif /* INET6 */
829 }
830 } else
831 PF_POOLMASK(naddr,
832 &r->rdr.cur->addr.v.a.addr,
833 &r->rdr.cur->addr.v.a.mask, &pd->nsaddr,
834 pd->af);
835 break;
836 case PF_IN:
837 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
838 switch (pd->af) {
839 #ifdef INET
840 case AF_INET:
841 if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
842 reason = PFRES_MAPFAILED;
843 goto notrans;
844 }
845 PF_POOLMASK(naddr,
846 &r->src.addr.p.dyn->pfid_addr4,
847 &r->src.addr.p.dyn->pfid_mask4,
848 &pd->ndaddr, AF_INET);
849 break;
850 #endif /* INET */
851 #ifdef INET6
852 case AF_INET6:
853 if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
854 reason = PFRES_MAPFAILED;
855 goto notrans;
856 }
857 PF_POOLMASK(naddr,
858 &r->src.addr.p.dyn->pfid_addr6,
859 &r->src.addr.p.dyn->pfid_mask6,
860 &pd->ndaddr, AF_INET6);
861 break;
862 #endif /* INET6 */
863 }
864 } else
865 PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
866 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af);
867 break;
868 }
869 break;
870 case PF_RDR: {
871 struct pf_state_key_cmp key;
872 int tries;
873 uint16_t cut, low, high, nport;
874
875 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL,
876 NULL, &sn, &sh, &r->rdr);
877 if (reason != 0)
878 goto notrans;
879 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
880 PF_POOLMASK(naddr, naddr, &r->rdr.cur->addr.v.a.mask,
881 &pd->ndaddr, pd->af);
882
883 /* Do not change SCTP ports. */
884 if (pd->proto == IPPROTO_SCTP)
885 break;
886
887 if (r->rdr.proxy_port[1]) {
888 uint32_t tmp_nport;
889
890 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) %
891 (r->rdr.proxy_port[1] - r->rdr.proxy_port[0] +
892 1)) + r->rdr.proxy_port[0];
893
894 /* Wrap around if necessary. */
895 if (tmp_nport > 65535)
896 tmp_nport -= 65535;
897 nport = htons((uint16_t)tmp_nport);
898 } else if (r->rdr.proxy_port[0])
899 nport = htons(r->rdr.proxy_port[0]);
900 else
901 nport = pd->ndport;
902
903 /*
904 * Update the destination port.
905 */
906 *nportp = nport;
907
908 /*
909 * Do we have a source port conflict in the stack state? Try to
910 * modulate the source port if so. Note that this is racy since
911 * the state lookup may not find any matches here but will once
912 * pf_create_state() actually instantiates the state.
913 */
914 bzero(&key, sizeof(key));
915 key.af = pd->af;
916 key.proto = pd->proto;
917 key.port[0] = pd->nsport;
918 PF_ACPY(&key.addr[0], &pd->nsaddr, key.af);
919 key.port[1] = nport;
920 PF_ACPY(&key.addr[1], naddr, key.af);
921
922 if (!pf_find_state_all_exists(&key, PF_OUT))
923 break;
924
925 tries = 0;
926
927 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
928 high = 65535;
929 cut = arc4random() % (1 + high - low) + low;
930 for (uint32_t tmp = cut;
931 tmp <= high && tmp <= UINT16_MAX &&
932 tries < V_pf_rdr_srcport_rewrite_tries;
933 tmp++, tries++) {
934 key.port[0] = htons(tmp);
935 if (!pf_find_state_all_exists(&key, PF_OUT)) {
936 /* Update the source port. */
937 (*nkp)->port[0] = htons(tmp);
938 goto out;
939 }
940 }
941 for (uint32_t tmp = cut - 1;
942 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
943 tmp--, tries++) {
944 key.port[0] = htons(tmp);
945 if (!pf_find_state_all_exists(&key, PF_OUT)) {
946 /* Update the source port. */
947 (*nkp)->port[0] = htons(tmp);
948 goto out;
949 }
950 }
951
952 /*
953 * We failed to find a match. Push on ahead anyway, let
954 * pf_state_insert() be the arbiter of whether the state
955 * conflict is tolerable. In particular, with TCP connections
956 * the state may be reused if the TCP state is terminal.
957 */
958 DPFPRINTF(PF_DEBUG_MISC,
959 ("pf: RDR source port allocation failed\n"));
960 break;
961
962 out:
963 DPFPRINTF(PF_DEBUG_MISC,
964 ("pf: RDR source port allocation %u->%u\n",
965 ntohs(pd->nsport), ntohs((*nkp)->port[0])));
966 break;
967 }
968 default:
969 panic("%s: unknown action %u", __func__, r->action);
970 }
971
972 /* Return success only if translation really happened. */
973 if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
974 *rp = r;
975 return (PFRES_MATCH);
976 }
977
978 reason = PFRES_MAX;
979 notrans:
980 uma_zfree(V_pf_state_key_z, *nkp);
981 uma_zfree(V_pf_state_key_z, *skp);
982 *skp = *nkp = NULL;
983
984 return (reason);
985 }
986
987 int
pf_get_transaddr_af(struct pf_krule * r,struct pf_pdesc * pd)988 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
989 {
990 #if defined(INET) && defined(INET6)
991 struct pf_addr ndaddr, nsaddr, naddr;
992 u_int16_t nport = 0;
993 int prefixlen = 96;
994 struct pf_srchash *sh = NULL;
995 struct pf_ksrc_node *sns = NULL;
996
997 bzero(&nsaddr, sizeof(nsaddr));
998 bzero(&ndaddr, sizeof(ndaddr));
999
1000 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1001 printf("pf: af-to %s %s, ",
1002 pd->naf == AF_INET ? "inet" : "inet6",
1003 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr");
1004 pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
1005 printf(" -> ");
1006 pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
1007 printf("\n");
1008 }
1009
1010 if (TAILQ_EMPTY(&r->nat.list))
1011 panic("pf_get_transaddr_af: no nat pool for source address");
1012
1013 /* get source address and port */
1014 if (pf_get_sport(pd, r, &nsaddr, &nport,
1015 r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat, NULL)) {
1016 DPFPRINTF(PF_DEBUG_MISC,
1017 ("pf: af-to NAT proxy port allocation (%u-%u) failed",
1018 r->nat.proxy_port[0], r->nat.proxy_port[1]));
1019 return (-1);
1020 }
1021
1022 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
1023 if (pd->dir == PF_IN) {
1024 NTOHS(pd->ndport);
1025 if (pd->ndport == ICMP6_ECHO_REQUEST)
1026 pd->ndport = ICMP_ECHO;
1027 else if (pd->ndport == ICMP6_ECHO_REPLY)
1028 pd->ndport = ICMP_ECHOREPLY;
1029 HTONS(pd->ndport);
1030 } else {
1031 NTOHS(pd->nsport);
1032 if (pd->nsport == ICMP6_ECHO_REQUEST)
1033 pd->nsport = ICMP_ECHO;
1034 else if (pd->nsport == ICMP6_ECHO_REPLY)
1035 pd->nsport = ICMP_ECHOREPLY;
1036 HTONS(pd->nsport);
1037 }
1038 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
1039 if (pd->dir == PF_IN) {
1040 NTOHS(pd->ndport);
1041 if (pd->ndport == ICMP_ECHO)
1042 pd->ndport = ICMP6_ECHO_REQUEST;
1043 else if (pd->ndport == ICMP_ECHOREPLY)
1044 pd->ndport = ICMP6_ECHO_REPLY;
1045 HTONS(pd->ndport);
1046 } else {
1047 NTOHS(pd->nsport);
1048 if (pd->nsport == ICMP_ECHO)
1049 pd->nsport = ICMP6_ECHO_REQUEST;
1050 else if (pd->nsport == ICMP_ECHOREPLY)
1051 pd->nsport = ICMP6_ECHO_REPLY;
1052 HTONS(pd->nsport);
1053 }
1054 }
1055
1056 /* get the destination address and port */
1057 if (! TAILQ_EMPTY(&r->rdr.list)) {
1058 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL,
1059 &sns, NULL, &r->rdr))
1060 return (-1);
1061 if (r->rdr.proxy_port[0])
1062 pd->ndport = htons(r->rdr.proxy_port[0]);
1063
1064 if (pd->naf == AF_INET) {
1065 /* The prefix is the IPv4 rdr address */
1066 prefixlen = in_mask2len(
1067 (struct in_addr *)&r->rdr.cur->addr.v.a.mask);
1068 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1069 prefixlen);
1070 } else {
1071 /* The prefix is the IPv6 rdr address */
1072 prefixlen = in6_mask2len(
1073 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL);
1074 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1075 prefixlen);
1076 }
1077 } else {
1078 if (pd->naf == AF_INET) {
1079 /* The prefix is the IPv6 dst address */
1080 prefixlen = in6_mask2len(
1081 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL);
1082 if (prefixlen < 32)
1083 prefixlen = 96;
1084 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr,
1085 prefixlen);
1086 } else {
1087 /*
1088 * The prefix is the IPv6 nat address
1089 * (that was stored in pd->nsaddr)
1090 */
1091 prefixlen = in6_mask2len(
1092 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL);
1093 if (prefixlen > 96)
1094 prefixlen = 96;
1095 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr,
1096 prefixlen);
1097 }
1098 }
1099
1100 PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
1101 PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
1102
1103 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1104 printf("pf: af-to %s done, prefixlen %d, ",
1105 pd->naf == AF_INET ? "inet" : "inet6",
1106 prefixlen);
1107 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
1108 printf(" -> ");
1109 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
1110 printf("\n");
1111 }
1112
1113 return (0);
1114 #else
1115 return (-1);
1116 #endif
1117 }
1118