1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37 */
38
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49
50 #include <crypto/siphash/siphash.h>
51
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <net/vnet.h>
55 #include <net/pfvar.h>
56 #include <net/if_pflog.h>
57
58 #ifdef INET
59 #include <netinet/in_var.h>
60 #endif
61
62 #ifdef INET6
63 #include <netinet6/in6_var.h>
64 #endif
65
66
67 /*
68 * Limit the amount of work we do to find a free source port for redirects that
69 * introduce a state conflict.
70 */
71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries)
72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
73
74 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
75
76 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *,
77 struct pf_poolhashkey *, sa_family_t);
78 static struct pf_krule *pf_match_translation(struct pf_pdesc *,
79 int, struct pf_kanchor_stackframe *);
80 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *,
81 struct pf_addr *, uint16_t *, uint16_t, uint16_t,
82 struct pf_ksrc_node **, struct pf_srchash **,
83 struct pf_kpool *, struct pf_udp_mapping **,
84 pf_sn_types_t);
85 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *);
86
87 static uint64_t
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)88 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
89 struct pf_poolhashkey *key, sa_family_t af)
90 {
91 SIPHASH_CTX ctx;
92 #ifdef INET6
93 union {
94 uint64_t hash64;
95 uint32_t hash32[2];
96 } h;
97 #endif
98 uint64_t res = 0;
99
100 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
101
102 switch (af) {
103 #ifdef INET
104 case AF_INET:
105 res = SipHash24(&ctx, (const uint8_t *)key,
106 &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
107 hash->addr32[0] = res;
108 break;
109 #endif /* INET */
110 #ifdef INET6
111 case AF_INET6:
112 res = SipHash24(&ctx, (const uint8_t *)key,
113 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
114 h.hash64 = res;
115 hash->addr32[0] = h.hash32[0];
116 hash->addr32[1] = h.hash32[1];
117 /*
118 * siphash isn't big enough, but flipping it around is
119 * good enough here.
120 */
121 hash->addr32[2] = ~h.hash32[1];
122 hash->addr32[3] = ~h.hash32[0];
123 break;
124 #endif /* INET6 */
125 }
126 return (res);
127 }
128
129 static struct pf_krule *
pf_match_translation(struct pf_pdesc * pd,int rs_num,struct pf_kanchor_stackframe * anchor_stack)130 pf_match_translation(struct pf_pdesc *pd,
131 int rs_num, struct pf_kanchor_stackframe *anchor_stack)
132 {
133 struct pf_krule *r, *rm = NULL;
134 struct pf_kruleset *ruleset = NULL;
135 int tag = -1;
136 int rtableid = -1;
137 int asd = 0;
138
139 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
140 while (r != NULL) {
141 struct pf_rule_addr *src = NULL, *dst = NULL;
142 struct pf_addr_wrap *xdst = NULL;
143
144 if (r->action == PF_BINAT && pd->dir == PF_IN) {
145 src = &r->dst;
146 if (r->rdr.cur != NULL)
147 xdst = &r->rdr.cur->addr;
148 } else {
149 src = &r->src;
150 dst = &r->dst;
151 }
152
153 pf_counter_u64_add(&r->evaluations, 1);
154 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot)
155 r = r->skip[PF_SKIP_IFP];
156 else if (r->direction && r->direction != pd->dir)
157 r = r->skip[PF_SKIP_DIR];
158 else if (r->af && r->af != pd->af)
159 r = r->skip[PF_SKIP_AF];
160 else if (r->proto && r->proto != pd->proto)
161 r = r->skip[PF_SKIP_PROTO];
162 else if (PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af,
163 src->neg, pd->kif, M_GETFIB(pd->m)))
164 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
165 PF_SKIP_DST_ADDR];
166 else if (src->port_op && !pf_match_port(src->port_op,
167 src->port[0], src->port[1], pd->nsport))
168 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
169 PF_SKIP_DST_PORT];
170 else if (dst != NULL &&
171 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL,
172 M_GETFIB(pd->m)))
173 r = r->skip[PF_SKIP_DST_ADDR];
174 else if (xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af,
175 0, NULL, M_GETFIB(pd->m)))
176 r = TAILQ_NEXT(r, entries);
177 else if (dst != NULL && dst->port_op &&
178 !pf_match_port(dst->port_op, dst->port[0],
179 dst->port[1], pd->ndport))
180 r = r->skip[PF_SKIP_DST_PORT];
181 else if (r->match_tag && !pf_match_tag(pd->m, r, &tag,
182 pd->pf_mtag ? pd->pf_mtag->tag : 0))
183 r = TAILQ_NEXT(r, entries);
184 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
185 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
186 &pd->hdr.tcp), r->os_fingerprint)))
187 r = TAILQ_NEXT(r, entries);
188 else {
189 if (r->tag)
190 tag = r->tag;
191 if (r->rtableid >= 0)
192 rtableid = r->rtableid;
193 if (r->anchor == NULL) {
194 rm = r;
195 if (rm->action == PF_NONAT ||
196 rm->action == PF_NORDR ||
197 rm->action == PF_NOBINAT) {
198 rm = NULL;
199 }
200 break;
201 } else
202 pf_step_into_anchor(anchor_stack, &asd,
203 &ruleset, rs_num, &r, NULL);
204 }
205 if (r == NULL)
206 pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
207 rs_num, &r, NULL, NULL);
208 }
209
210 if (tag > 0 && pf_tag_packet(pd, tag))
211 return (NULL);
212 if (rtableid >= 0)
213 M_SETFIB(pd->m, rtableid);
214
215 return (rm);
216 }
217
218 static int
pf_get_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_kpool * rpool,struct pf_udp_mapping ** udp_mapping,pf_sn_types_t sn_type)219 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
220 struct pf_addr *naddr, uint16_t *nport, uint16_t low,
221 uint16_t high, struct pf_ksrc_node **sn,
222 struct pf_srchash **sh, struct pf_kpool *rpool,
223 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type)
224 {
225 struct pf_state_key_cmp key;
226 struct pf_addr init_addr;
227
228 bzero(&init_addr, sizeof(init_addr));
229
230 if (udp_mapping) {
231 MPASS(*udp_mapping == NULL);
232 }
233
234 /*
235 * If we are UDP and have an existing mapping we can get source port
236 * from the mapping. In this case we have to look up the src_node as
237 * pf_map_addr would.
238 */
239 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) {
240 struct pf_udp_endpoint_cmp udp_source;
241
242 bzero(&udp_source, sizeof(udp_source));
243 udp_source.af = pd->af;
244 PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af);
245 udp_source.port = pd->nsport;
246 if (udp_mapping) {
247 *udp_mapping = pf_udp_mapping_find(&udp_source);
248 if (*udp_mapping) {
249 PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af);
250 *nport = (*udp_mapping)->endpoints[1].port;
251 /* Try to find a src_node as per pf_map_addr(). */
252 if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
253 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
254 *sn = pf_find_src_node(&pd->nsaddr, r,
255 pd->af, sh, sn_type, false);
256 if (*sn != NULL)
257 PF_SRC_NODE_UNLOCK(*sn);
258 return (0);
259 } else {
260 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr,
261 pd->nsport, &init_addr, 0);
262 if (*udp_mapping == NULL)
263 return (1);
264 }
265 }
266 }
267
268 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr,
269 sn, sh, rpool, sn_type))
270 goto failed;
271
272 if (pd->proto == IPPROTO_ICMP) {
273 if (*nport == htons(ICMP_ECHO)) {
274 low = 1;
275 high = 65535;
276 } else
277 return (0); /* Don't try to modify non-echo ICMP */
278 }
279 #ifdef INET6
280 if (pd->proto == IPPROTO_ICMPV6) {
281 if (*nport == htons(ICMP6_ECHO_REQUEST)) {
282 low = 1;
283 high = 65535;
284 } else
285 return (0); /* Don't try to modify non-echo ICMP */
286 }
287 #endif /* INET6 */
288
289 bzero(&key, sizeof(key));
290 key.af = pd->naf;
291 key.proto = pd->proto;
292 key.port[0] = pd->ndport;
293 PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
294
295 do {
296 PF_ACPY(&key.addr[1], naddr, key.af);
297 if (udp_mapping && *udp_mapping)
298 PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af);
299
300 /*
301 * port search; start random, step;
302 * similar 2 portloop in in_pcbbind
303 */
304 if (pd->proto == IPPROTO_SCTP) {
305 key.port[1] = pd->nsport;
306 if (!pf_find_state_all_exists(&key, PF_IN)) {
307 *nport = pd->nsport;
308 return (0);
309 } else {
310 return (1); /* Fail mapping. */
311 }
312 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
313 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
314 /*
315 * XXX bug: icmp states don't use the id on both sides.
316 * (traceroute -I through nat)
317 */
318 key.port[1] = pd->nsport;
319 if (!pf_find_state_all_exists(&key, PF_IN)) {
320 *nport = pd->nsport;
321 return (0);
322 }
323 } else if (low == high) {
324 key.port[1] = htons(low);
325 if (!pf_find_state_all_exists(&key, PF_IN)) {
326 if (udp_mapping && *udp_mapping != NULL) {
327 (*udp_mapping)->endpoints[1].port = htons(low);
328 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
329 *nport = htons(low);
330 return (0);
331 }
332 } else {
333 *nport = htons(low);
334 return (0);
335 }
336 }
337 } else {
338 uint32_t tmp;
339 uint16_t cut;
340
341 if (low > high) {
342 tmp = low;
343 low = high;
344 high = tmp;
345 }
346 /* low < high */
347 cut = arc4random() % (1 + high - low) + low;
348 /* low <= cut <= high */
349 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
350 if (udp_mapping && *udp_mapping != NULL) {
351 (*udp_mapping)->endpoints[1].port = htons(tmp);
352 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
353 *nport = htons(tmp);
354 return (0);
355 }
356 } else {
357 key.port[1] = htons(tmp);
358 if (!pf_find_state_all_exists(&key, PF_IN)) {
359 *nport = htons(tmp);
360 return (0);
361 }
362 }
363 }
364 tmp = cut;
365 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
366 if (pd->proto == IPPROTO_UDP &&
367 (rpool->opts & PF_POOL_ENDPI &&
368 udp_mapping != NULL)) {
369 (*udp_mapping)->endpoints[1].port = htons(tmp);
370 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
371 *nport = htons(tmp);
372 return (0);
373 }
374 } else {
375 key.port[1] = htons(tmp);
376 if (!pf_find_state_all_exists(&key, PF_IN)) {
377 *nport = htons(tmp);
378 return (0);
379 }
380 }
381 }
382 }
383
384 switch (rpool->opts & PF_POOL_TYPEMASK) {
385 case PF_POOL_RANDOM:
386 case PF_POOL_ROUNDROBIN:
387 /*
388 * pick a different source address since we're out
389 * of free port choices for the current one.
390 */
391 (*sn) = NULL;
392 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL,
393 &init_addr, sn, sh, rpool, sn_type))
394 return (1);
395 break;
396 case PF_POOL_NONE:
397 case PF_POOL_SRCHASH:
398 case PF_POOL_BITMASK:
399 default:
400 return (1);
401 }
402 } while (! PF_AEQ(&init_addr, naddr, pd->naf) );
403
404 failed:
405 if (udp_mapping) {
406 uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
407 *udp_mapping = NULL;
408 }
409
410 return (1); /* none available */
411 }
412
413 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)414 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
415 {
416 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
417 return (true);
418 return (false);
419 }
420
421 static int
pf_get_mape_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_udp_mapping ** udp_mapping)422 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
423 struct pf_addr *naddr, uint16_t *nport,
424 struct pf_ksrc_node **sn, struct pf_srchash **sh,
425 struct pf_udp_mapping **udp_mapping)
426 {
427 uint16_t psmask, low, highmask;
428 uint16_t i, ahigh, cut;
429 int ashift, psidshift;
430
431 ashift = 16 - r->rdr.mape.offset;
432 psidshift = ashift - r->rdr.mape.psidlen;
433 psmask = r->rdr.mape.psid & ((1U << r->rdr.mape.psidlen) - 1);
434 psmask = psmask << psidshift;
435 highmask = (1U << psidshift) - 1;
436
437 ahigh = (1U << r->rdr.mape.offset) - 1;
438 cut = arc4random() & ahigh;
439 if (cut == 0)
440 cut = 1;
441
442 for (i = cut; i <= ahigh; i++) {
443 low = (i << ashift) | psmask;
444 if (!pf_get_sport(pd, r,
445 naddr, nport, low, low | highmask, sn, sh, &r->rdr,
446 udp_mapping, PF_SN_NAT))
447 return (0);
448 }
449 for (i = cut - 1; i > 0; i--) {
450 low = (i << ashift) | psmask;
451 if (!pf_get_sport(pd, r,
452 naddr, nport, low, low | highmask, sn, sh, &r->rdr,
453 udp_mapping, PF_SN_NAT))
454 return (0);
455 }
456 return (1);
457 }
458
459 u_short
pf_map_addr(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_kpool * rpool)460 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
461 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
462 struct pf_kpool *rpool)
463 {
464 u_short reason = PFRES_MATCH;
465 struct pf_addr *raddr = NULL, *rmask = NULL;
466 uint64_t hashidx;
467 int cnt;
468
469 mtx_lock(&rpool->mtx);
470 /* Find the route using chosen algorithm. Store the found route
471 in src_node if it was given or found. */
472 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
473 reason = PFRES_MAPFAILED;
474 goto done_pool_mtx;
475 }
476 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
477 switch (af) {
478 #ifdef INET
479 case AF_INET:
480 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
481 !PF_POOL_DYNTYPE(rpool->opts)) {
482 reason = PFRES_MAPFAILED;
483 goto done_pool_mtx;
484 }
485 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
486 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
487 break;
488 #endif /* INET */
489 #ifdef INET6
490 case AF_INET6:
491 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
492 !PF_POOL_DYNTYPE(rpool->opts)) {
493 reason = PFRES_MAPFAILED;
494 goto done_pool_mtx;
495 }
496 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
497 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
498 break;
499 #endif /* INET6 */
500 }
501 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
502 if (!PF_POOL_DYNTYPE(rpool->opts)) {
503 reason = PFRES_MAPFAILED;
504 goto done_pool_mtx; /* unsupported */
505 }
506 } else {
507 raddr = &rpool->cur->addr.v.a.addr;
508 rmask = &rpool->cur->addr.v.a.mask;
509 }
510
511 switch (rpool->opts & PF_POOL_TYPEMASK) {
512 case PF_POOL_NONE:
513 PF_ACPY(naddr, raddr, af);
514 break;
515 case PF_POOL_BITMASK:
516 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
517 break;
518 case PF_POOL_RANDOM:
519 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
520 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
521 if (cnt == 0)
522 rpool->tblidx = 0;
523 else
524 rpool->tblidx = (int)arc4random_uniform(cnt);
525 memset(&rpool->counter, 0, sizeof(rpool->counter));
526 if (pfr_pool_get(rpool->cur->addr.p.tbl,
527 &rpool->tblidx, &rpool->counter, af, NULL)) {
528 reason = PFRES_MAPFAILED;
529 goto done_pool_mtx; /* unsupported */
530 }
531 PF_ACPY(naddr, &rpool->counter, af);
532 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
533 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
534 if (cnt == 0)
535 rpool->tblidx = 0;
536 else
537 rpool->tblidx = (int)arc4random_uniform(cnt);
538 memset(&rpool->counter, 0, sizeof(rpool->counter));
539 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
540 &rpool->tblidx, &rpool->counter, af,
541 pf_islinklocal)) {
542 reason = PFRES_MAPFAILED;
543 goto done_pool_mtx; /* unsupported */
544 }
545 PF_ACPY(naddr, &rpool->counter, af);
546 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
547 switch (af) {
548 #ifdef INET
549 case AF_INET:
550 rpool->counter.addr32[0] = htonl(arc4random());
551 break;
552 #endif /* INET */
553 #ifdef INET6
554 case AF_INET6:
555 if (rmask->addr32[3] != 0xffffffff)
556 rpool->counter.addr32[3] =
557 htonl(arc4random());
558 else
559 break;
560 if (rmask->addr32[2] != 0xffffffff)
561 rpool->counter.addr32[2] =
562 htonl(arc4random());
563 else
564 break;
565 if (rmask->addr32[1] != 0xffffffff)
566 rpool->counter.addr32[1] =
567 htonl(arc4random());
568 else
569 break;
570 if (rmask->addr32[0] != 0xffffffff)
571 rpool->counter.addr32[0] =
572 htonl(arc4random());
573 break;
574 #endif /* INET6 */
575 }
576 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
577 PF_ACPY(init_addr, naddr, af);
578
579 } else {
580 PF_AINC(&rpool->counter, af);
581 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
582 }
583 break;
584 case PF_POOL_SRCHASH:
585 {
586 unsigned char hash[16];
587
588 hashidx =
589 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
590 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
591 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
592 if (cnt == 0)
593 rpool->tblidx = 0;
594 else
595 rpool->tblidx = (int)(hashidx % cnt);
596 memset(&rpool->counter, 0, sizeof(rpool->counter));
597 if (pfr_pool_get(rpool->cur->addr.p.tbl,
598 &rpool->tblidx, &rpool->counter, af, NULL)) {
599 reason = PFRES_MAPFAILED;
600 goto done_pool_mtx; /* unsupported */
601 }
602 PF_ACPY(naddr, &rpool->counter, af);
603 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
604 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
605 if (cnt == 0)
606 rpool->tblidx = 0;
607 else
608 rpool->tblidx = (int)(hashidx % cnt);
609 memset(&rpool->counter, 0, sizeof(rpool->counter));
610 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
611 &rpool->tblidx, &rpool->counter, af,
612 pf_islinklocal)) {
613 reason = PFRES_MAPFAILED;
614 goto done_pool_mtx; /* unsupported */
615 }
616 PF_ACPY(naddr, &rpool->counter, af);
617 } else {
618 PF_POOLMASK(naddr, raddr, rmask,
619 (struct pf_addr *)&hash, af);
620 }
621 break;
622 }
623 case PF_POOL_ROUNDROBIN:
624 {
625 struct pf_kpooladdr *acur = rpool->cur;
626
627 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
628 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
629 &rpool->tblidx, &rpool->counter, af, NULL))
630 goto get_addr;
631 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
632 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
633 &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
634 goto get_addr;
635 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
636 goto get_addr;
637
638 try_next:
639 if (TAILQ_NEXT(rpool->cur, entries) == NULL)
640 rpool->cur = TAILQ_FIRST(&rpool->list);
641 else
642 rpool->cur = TAILQ_NEXT(rpool->cur, entries);
643 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
644 if (pfr_pool_get(rpool->cur->addr.p.tbl,
645 &rpool->tblidx, &rpool->counter, af, NULL)) {
646 /* table contains no address of type 'af' */
647 if (rpool->cur != acur)
648 goto try_next;
649 reason = PFRES_MAPFAILED;
650 goto done_pool_mtx;
651 }
652 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
653 rpool->tblidx = -1;
654 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
655 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
656 /* table contains no address of type 'af' */
657 if (rpool->cur != acur)
658 goto try_next;
659 reason = PFRES_MAPFAILED;
660 goto done_pool_mtx;
661 }
662 } else {
663 raddr = &rpool->cur->addr.v.a.addr;
664 rmask = &rpool->cur->addr.v.a.mask;
665 PF_ACPY(&rpool->counter, raddr, af);
666 }
667
668 get_addr:
669 PF_ACPY(naddr, &rpool->counter, af);
670 if (init_addr != NULL && PF_AZERO(init_addr, af))
671 PF_ACPY(init_addr, naddr, af);
672 PF_AINC(&rpool->counter, af);
673 break;
674 }
675 }
676
677 if (nkif)
678 *nkif = rpool->cur->kif;
679
680 done_pool_mtx:
681 mtx_unlock(&rpool->mtx);
682
683 if (reason) {
684 counter_u64_add(V_pf_status.counters[reason], 1);
685 }
686
687 return (reason);
688 }
689
690 u_short
pf_map_addr_sn(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_kpool * rpool,pf_sn_types_t sn_type)691 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
692 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
693 struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool,
694 pf_sn_types_t sn_type)
695 {
696 u_short reason = 0;
697
698 KASSERT(*sn == NULL, ("*sn not NULL"));
699
700 /*
701 * If this is a sticky-address rule, try to find an existing src_node.
702 * Request the sh to be unlocked if sn was not found, as we never
703 * insert a new sn when parsing the ruleset.
704 */
705 if (rpool->opts & PF_POOL_STICKYADDR &&
706 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
707 *sn = pf_find_src_node(saddr, r, af, sh, sn_type, false);
708
709 if (*sn != NULL) {
710 PF_SRC_NODE_LOCK_ASSERT(*sn);
711
712 /* If the supplied address is the same as the current one we've
713 * been asked before, so tell the caller that there's no other
714 * address to be had. */
715 if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
716 reason = PFRES_MAPFAILED;
717 goto done;
718 }
719
720 PF_ACPY(naddr, &(*sn)->raddr, af);
721 if (nkif)
722 *nkif = (*sn)->rkif;
723 if (V_pf_status.debug >= PF_DEBUG_NOISY) {
724 printf("pf_map_addr: src tracking maps ");
725 pf_print_host(saddr, 0, af);
726 printf(" to ");
727 pf_print_host(naddr, 0, af);
728 if (nkif)
729 printf("@%s", (*nkif)->pfik_name);
730 printf("\n");
731 }
732 goto done;
733 }
734
735 /*
736 * Source node has not been found. Find a new address and store it
737 * in variables given by the caller.
738 */
739 if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) {
740 /* pf_map_addr() sets reason counters on its own */
741 goto done;
742 }
743
744 if (V_pf_status.debug >= PF_DEBUG_NOISY &&
745 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
746 printf("pf_map_addr: selected address ");
747 pf_print_host(naddr, 0, af);
748 if (nkif)
749 printf("@%s", (*nkif)->pfik_name);
750 printf("\n");
751 }
752
753 done:
754 if ((*sn) != NULL)
755 PF_SRC_NODE_UNLOCK(*sn);
756
757 if (reason) {
758 counter_u64_add(V_pf_status.counters[reason], 1);
759 }
760
761 return (reason);
762 }
763
764 u_short
pf_get_translation(struct pf_pdesc * pd,int off,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_kanchor_stackframe * anchor_stack,struct pf_krule ** rp,struct pf_udp_mapping ** udp_mapping)765 pf_get_translation(struct pf_pdesc *pd, int off,
766 struct pf_state_key **skp, struct pf_state_key **nkp,
767 struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp,
768 struct pf_udp_mapping **udp_mapping)
769 {
770 struct pf_krule *r = NULL;
771 struct pf_addr *naddr;
772 struct pf_ksrc_node *sn = NULL;
773 struct pf_srchash *sh = NULL;
774 uint16_t *nportp;
775 uint16_t low, high;
776 u_short reason;
777
778 PF_RULES_RASSERT();
779 KASSERT(*skp == NULL, ("*skp not NULL"));
780 KASSERT(*nkp == NULL, ("*nkp not NULL"));
781
782 *rp = NULL;
783
784 if (pd->dir == PF_OUT) {
785 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack);
786 if (r == NULL)
787 r = pf_match_translation(pd, PF_RULESET_NAT, anchor_stack);
788 } else {
789 r = pf_match_translation(pd, PF_RULESET_RDR, anchor_stack);
790 if (r == NULL)
791 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack);
792 }
793
794 if (r == NULL)
795 return (PFRES_MAX);
796
797 switch (r->action) {
798 case PF_NONAT:
799 case PF_NOBINAT:
800 case PF_NORDR:
801 return (PFRES_MAX);
802 }
803
804 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, skp, nkp))
805 return (PFRES_MEMORY);
806
807 naddr = &(*nkp)->addr[1];
808 nportp = &(*nkp)->port[1];
809
810 switch (r->action) {
811 case PF_NAT:
812 if (pd->proto == IPPROTO_ICMP) {
813 low = 1;
814 high = 65535;
815 } else {
816 low = r->rdr.proxy_port[0];
817 high = r->rdr.proxy_port[1];
818 }
819 if (r->rdr.mape.offset > 0) {
820 if (pf_get_mape_sport(pd, r, naddr, nportp, &sn,
821 &sh, udp_mapping)) {
822 DPFPRINTF(PF_DEBUG_MISC,
823 ("pf: MAP-E port allocation (%u/%u/%u)"
824 " failed\n",
825 r->rdr.mape.offset,
826 r->rdr.mape.psidlen,
827 r->rdr.mape.psid));
828 reason = PFRES_MAPFAILED;
829 goto notrans;
830 }
831 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn,
832 &sh, &r->rdr, udp_mapping, PF_SN_NAT)) {
833 DPFPRINTF(PF_DEBUG_MISC,
834 ("pf: NAT proxy port allocation (%u-%u) failed\n",
835 r->rdr.proxy_port[0], r->rdr.proxy_port[1]));
836 reason = PFRES_MAPFAILED;
837 goto notrans;
838 }
839 break;
840 case PF_BINAT:
841 switch (pd->dir) {
842 case PF_OUT:
843 if (r->rdr.cur->addr.type == PF_ADDR_DYNIFTL){
844 switch (pd->af) {
845 #ifdef INET
846 case AF_INET:
847 if (r->rdr.cur->addr.p.dyn->
848 pfid_acnt4 < 1) {
849 reason = PFRES_MAPFAILED;
850 goto notrans;
851 }
852 PF_POOLMASK(naddr,
853 &r->rdr.cur->addr.p.dyn->
854 pfid_addr4,
855 &r->rdr.cur->addr.p.dyn->
856 pfid_mask4, &pd->nsaddr, AF_INET);
857 break;
858 #endif /* INET */
859 #ifdef INET6
860 case AF_INET6:
861 if (r->rdr.cur->addr.p.dyn->
862 pfid_acnt6 < 1) {
863 reason = PFRES_MAPFAILED;
864 goto notrans;
865 }
866 PF_POOLMASK(naddr,
867 &r->rdr.cur->addr.p.dyn->
868 pfid_addr6,
869 &r->rdr.cur->addr.p.dyn->
870 pfid_mask6, &pd->nsaddr, AF_INET6);
871 break;
872 #endif /* INET6 */
873 }
874 } else
875 PF_POOLMASK(naddr,
876 &r->rdr.cur->addr.v.a.addr,
877 &r->rdr.cur->addr.v.a.mask, &pd->nsaddr,
878 pd->af);
879 break;
880 case PF_IN:
881 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
882 switch (pd->af) {
883 #ifdef INET
884 case AF_INET:
885 if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
886 reason = PFRES_MAPFAILED;
887 goto notrans;
888 }
889 PF_POOLMASK(naddr,
890 &r->src.addr.p.dyn->pfid_addr4,
891 &r->src.addr.p.dyn->pfid_mask4,
892 &pd->ndaddr, AF_INET);
893 break;
894 #endif /* INET */
895 #ifdef INET6
896 case AF_INET6:
897 if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
898 reason = PFRES_MAPFAILED;
899 goto notrans;
900 }
901 PF_POOLMASK(naddr,
902 &r->src.addr.p.dyn->pfid_addr6,
903 &r->src.addr.p.dyn->pfid_mask6,
904 &pd->ndaddr, AF_INET6);
905 break;
906 #endif /* INET6 */
907 }
908 } else
909 PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
910 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af);
911 break;
912 }
913 break;
914 case PF_RDR: {
915 struct pf_state_key_cmp key;
916 int tries;
917 uint16_t cut, low, high, nport;
918
919 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL,
920 NULL, &sn, &sh, &r->rdr, PF_SN_NAT);
921 if (reason != 0)
922 goto notrans;
923 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
924 PF_POOLMASK(naddr, naddr, &r->rdr.cur->addr.v.a.mask,
925 &pd->ndaddr, pd->af);
926
927 /* Do not change SCTP ports. */
928 if (pd->proto == IPPROTO_SCTP)
929 break;
930
931 if (r->rdr.proxy_port[1]) {
932 uint32_t tmp_nport;
933
934 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) %
935 (r->rdr.proxy_port[1] - r->rdr.proxy_port[0] +
936 1)) + r->rdr.proxy_port[0];
937
938 /* Wrap around if necessary. */
939 if (tmp_nport > 65535)
940 tmp_nport -= 65535;
941 nport = htons((uint16_t)tmp_nport);
942 } else if (r->rdr.proxy_port[0])
943 nport = htons(r->rdr.proxy_port[0]);
944 else
945 nport = pd->ndport;
946
947 /*
948 * Update the destination port.
949 */
950 *nportp = nport;
951
952 /*
953 * Do we have a source port conflict in the stack state? Try to
954 * modulate the source port if so. Note that this is racy since
955 * the state lookup may not find any matches here but will once
956 * pf_create_state() actually instantiates the state.
957 */
958 bzero(&key, sizeof(key));
959 key.af = pd->af;
960 key.proto = pd->proto;
961 key.port[0] = pd->nsport;
962 PF_ACPY(&key.addr[0], &pd->nsaddr, key.af);
963 key.port[1] = nport;
964 PF_ACPY(&key.addr[1], naddr, key.af);
965
966 if (!pf_find_state_all_exists(&key, PF_OUT))
967 break;
968
969 tries = 0;
970
971 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
972 high = 65535;
973 cut = arc4random() % (1 + high - low) + low;
974 for (uint32_t tmp = cut;
975 tmp <= high && tmp <= UINT16_MAX &&
976 tries < V_pf_rdr_srcport_rewrite_tries;
977 tmp++, tries++) {
978 key.port[0] = htons(tmp);
979 if (!pf_find_state_all_exists(&key, PF_OUT)) {
980 /* Update the source port. */
981 (*nkp)->port[0] = htons(tmp);
982 goto out;
983 }
984 }
985 for (uint32_t tmp = cut - 1;
986 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
987 tmp--, tries++) {
988 key.port[0] = htons(tmp);
989 if (!pf_find_state_all_exists(&key, PF_OUT)) {
990 /* Update the source port. */
991 (*nkp)->port[0] = htons(tmp);
992 goto out;
993 }
994 }
995
996 /*
997 * We failed to find a match. Push on ahead anyway, let
998 * pf_state_insert() be the arbiter of whether the state
999 * conflict is tolerable. In particular, with TCP connections
1000 * the state may be reused if the TCP state is terminal.
1001 */
1002 DPFPRINTF(PF_DEBUG_MISC,
1003 ("pf: RDR source port allocation failed\n"));
1004 break;
1005
1006 out:
1007 DPFPRINTF(PF_DEBUG_MISC,
1008 ("pf: RDR source port allocation %u->%u\n",
1009 ntohs(pd->nsport), ntohs((*nkp)->port[0])));
1010 break;
1011 }
1012 default:
1013 panic("%s: unknown action %u", __func__, r->action);
1014 }
1015
1016 /* Return success only if translation really happened. */
1017 if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
1018 *rp = r;
1019 return (PFRES_MATCH);
1020 }
1021
1022 reason = PFRES_MAX;
1023 notrans:
1024 uma_zfree(V_pf_state_key_z, *nkp);
1025 uma_zfree(V_pf_state_key_z, *skp);
1026 *skp = *nkp = NULL;
1027
1028 return (reason);
1029 }
1030
1031 int
pf_get_transaddr_af(struct pf_krule * r,struct pf_pdesc * pd)1032 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
1033 {
1034 #if defined(INET) && defined(INET6)
1035 struct pf_addr ndaddr, nsaddr, naddr;
1036 u_int16_t nport = 0;
1037 int prefixlen = 96;
1038 struct pf_srchash *sh = NULL;
1039 struct pf_ksrc_node *sns = NULL;
1040
1041 bzero(&nsaddr, sizeof(nsaddr));
1042 bzero(&ndaddr, sizeof(ndaddr));
1043
1044 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1045 printf("pf: af-to %s %s, ",
1046 pd->naf == AF_INET ? "inet" : "inet6",
1047 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr");
1048 pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
1049 printf(" -> ");
1050 pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
1051 printf("\n");
1052 }
1053
1054 if (TAILQ_EMPTY(&r->nat.list))
1055 panic("pf_get_transaddr_af: no nat pool for source address");
1056
1057 /* get source address and port */
1058 if (pf_get_sport(pd, r, &nsaddr, &nport,
1059 r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat,
1060 NULL, PF_SN_NAT)) {
1061 DPFPRINTF(PF_DEBUG_MISC,
1062 ("pf: af-to NAT proxy port allocation (%u-%u) failed",
1063 r->nat.proxy_port[0], r->nat.proxy_port[1]));
1064 return (-1);
1065 }
1066
1067 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
1068 NTOHS(pd->ndport);
1069 if (pd->ndport == ICMP6_ECHO_REQUEST)
1070 pd->ndport = ICMP_ECHO;
1071 else if (pd->ndport == ICMP6_ECHO_REPLY)
1072 pd->ndport = ICMP_ECHOREPLY;
1073 HTONS(pd->ndport);
1074 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
1075 NTOHS(pd->ndport);
1076 if (pd->ndport == ICMP_ECHO)
1077 pd->ndport = ICMP6_ECHO_REQUEST;
1078 else if (pd->ndport == ICMP_ECHOREPLY)
1079 pd->ndport = ICMP6_ECHO_REPLY;
1080 HTONS(pd->ndport);
1081 }
1082
1083 /* get the destination address and port */
1084 if (! TAILQ_EMPTY(&r->rdr.list)) {
1085 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL,
1086 &sns, NULL, &r->rdr, PF_SN_NAT))
1087 return (-1);
1088 if (r->rdr.proxy_port[0])
1089 pd->ndport = htons(r->rdr.proxy_port[0]);
1090
1091 if (pd->naf == AF_INET) {
1092 /* The prefix is the IPv4 rdr address */
1093 prefixlen = in_mask2len(
1094 (struct in_addr *)&r->rdr.cur->addr.v.a.mask);
1095 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1096 prefixlen);
1097 } else {
1098 /* The prefix is the IPv6 rdr address */
1099 prefixlen = in6_mask2len(
1100 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL);
1101 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1102 prefixlen);
1103 }
1104 } else {
1105 if (pd->naf == AF_INET) {
1106 /* The prefix is the IPv6 dst address */
1107 prefixlen = in6_mask2len(
1108 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL);
1109 if (prefixlen < 32)
1110 prefixlen = 96;
1111 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr,
1112 prefixlen);
1113 } else {
1114 /*
1115 * The prefix is the IPv6 nat address
1116 * (that was stored in pd->nsaddr)
1117 */
1118 prefixlen = in6_mask2len(
1119 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL);
1120 if (prefixlen > 96)
1121 prefixlen = 96;
1122 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr,
1123 prefixlen);
1124 }
1125 }
1126
1127 PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
1128 PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
1129
1130 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1131 printf("pf: af-to %s done, prefixlen %d, ",
1132 pd->naf == AF_INET ? "inet" : "inet6",
1133 prefixlen);
1134 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
1135 printf(" -> ");
1136 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
1137 printf("\n");
1138 }
1139
1140 return (0);
1141 #else
1142 return (-1);
1143 #endif
1144 }
1145