1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_pf.h" 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 44 #include <sys/param.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <crypto/siphash/siphash.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 #include <net/pfvar.h> 56 #include <net/if_pflog.h> 57 58 #ifdef INET 59 #include <netinet/in_var.h> 60 #endif /* INET */ 61 62 #ifdef INET6 63 #include <netinet6/in6_var.h> 64 #endif /* INET6 */ 65 66 67 /* 68 * Limit the amount of work we do to find a free source port for redirects that 69 * introduce a state conflict. 70 */ 71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) 72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; 73 74 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, 75 struct pf_poolhashkey *, sa_family_t); 76 static struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); 77 static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *, 78 struct pf_krule *); 79 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, 80 struct pf_addr *, uint16_t *, uint16_t, uint16_t, 81 struct pf_kpool *, struct pf_udp_mapping **, 82 pf_sn_types_t); 83 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); 84 85 static uint64_t 86 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 87 struct pf_poolhashkey *key, sa_family_t af) 88 { 89 SIPHASH_CTX ctx; 90 #ifdef INET6 91 union { 92 uint64_t hash64; 93 uint32_t hash32[2]; 94 } h; 95 #endif /* INET6 */ 96 uint64_t res = 0; 97 98 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); 99 100 switch (af) { 101 #ifdef INET 102 case AF_INET: 103 res = SipHash24(&ctx, (const uint8_t *)key, 104 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 105 hash->addr32[0] = res; 106 break; 107 #endif /* INET */ 108 #ifdef INET6 109 case AF_INET6: 110 res = SipHash24(&ctx, (const uint8_t *)key, 111 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); 112 h.hash64 = res; 113 hash->addr32[0] = h.hash32[0]; 114 hash->addr32[1] = h.hash32[1]; 115 /* 116 * siphash isn't big enough, but flipping it around is 117 * good enough here. 118 */ 119 hash->addr32[2] = ~h.hash32[1]; 120 hash->addr32[3] = ~h.hash32[0]; 121 break; 122 #endif /* INET6 */ 123 default: 124 unhandled_af(af); 125 } 126 return (res); 127 } 128 129 #define PF_TEST_ATTRIB(t, a) \ 130 if (t) { \ 131 r = a; \ 132 continue; \ 133 } else do { \ 134 } while (0) 135 136 static enum pf_test_status 137 pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) 138 { 139 struct pf_krule *r; 140 struct pf_pdesc *pd = ctx->pd; 141 int rtableid = -1; 142 143 r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); 144 while (r != NULL) { 145 struct pf_rule_addr *src = NULL, *dst = NULL; 146 struct pf_addr_wrap *xdst = NULL; 147 148 if (r->action == PF_BINAT && pd->dir == PF_IN) { 149 src = &r->dst; 150 if (r->rdr.cur != NULL) 151 xdst = &r->rdr.cur->addr; 152 } else { 153 src = &r->src; 154 dst = &r->dst; 155 } 156 157 pf_counter_u64_add(&r->evaluations, 1); 158 PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, 159 r->skip[PF_SKIP_IFP]); 160 PF_TEST_ATTRIB(r->direction && r->direction != pd->dir, 161 r->skip[PF_SKIP_DIR]); 162 PF_TEST_ATTRIB(r->af && r->af != pd->af, 163 r->skip[PF_SKIP_AF]); 164 PF_TEST_ATTRIB(r->proto && r->proto != pd->proto, 165 r->skip[PF_SKIP_PROTO]); 166 PF_TEST_ATTRIB(PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af, 167 src->neg, pd->kif, M_GETFIB(pd->m)), 168 r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 169 PF_SKIP_DST_ADDR]); 170 PF_TEST_ATTRIB(src->port_op && !pf_match_port(src->port_op, 171 src->port[0], src->port[1], pd->nsport), 172 r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 173 PF_SKIP_DST_PORT]); 174 PF_TEST_ATTRIB(dst != NULL && 175 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL, 176 M_GETFIB(pd->m)), 177 r->skip[PF_SKIP_DST_ADDR]); 178 PF_TEST_ATTRIB(xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af, 179 0, NULL, M_GETFIB(pd->m)), 180 TAILQ_NEXT(r, entries)); 181 PF_TEST_ATTRIB(dst != NULL && dst->port_op && 182 !pf_match_port(dst->port_op, dst->port[0], 183 dst->port[1], pd->ndport), 184 r->skip[PF_SKIP_DST_PORT]); 185 PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag, 186 pd->pf_mtag ? pd->pf_mtag->tag : 0), 187 TAILQ_NEXT(r, entries)); 188 PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 189 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, 190 &pd->hdr.tcp), r->os_fingerprint)), 191 TAILQ_NEXT(r, entries)); 192 if (r->tag) 193 ctx->tag = r->tag; 194 if (r->rtableid >= 0) 195 rtableid = r->rtableid; 196 if (r->anchor == NULL) { 197 if (r->action == PF_NONAT || 198 r->action == PF_NORDR || 199 r->action == PF_NOBINAT) { 200 *ctx->rm = NULL; 201 } else { 202 /* 203 * found matching r 204 */ 205 ctx->tr = r; 206 /* 207 * anchor, with ruleset, where r belongs to 208 */ 209 *ctx->am = ctx->a; 210 /* 211 * ruleset where r belongs to 212 */ 213 *ctx->rsm = ruleset; 214 /* 215 * ruleset, where anchor belongs to. 216 */ 217 ctx->arsm = ctx->aruleset; 218 } 219 break; 220 } else { 221 ctx->a = r; /* remember anchor */ 222 ctx->aruleset = ruleset; /* and its ruleset */ 223 if (pf_step_into_translation_anchor(rs_num, ctx, 224 r) != PF_TEST_OK) { 225 break; 226 } 227 } 228 r = TAILQ_NEXT(r, entries); 229 } 230 231 if (ctx->tag > 0 && pf_tag_packet(pd, ctx->tag)) 232 return (PF_TEST_FAIL); 233 if (rtableid >= 0) 234 M_SETFIB(pd->m, rtableid); 235 236 return (PF_TEST_OK); 237 } 238 239 static enum pf_test_status 240 pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_krule *r) 241 { 242 enum pf_test_status rv; 243 244 PF_RULES_RASSERT(); 245 246 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 247 printf("%s: anchor stack overflow on %s\n", 248 __func__, r->anchor->name); 249 return (PF_TEST_FAIL); 250 } 251 252 ctx->depth++; 253 254 if (r->anchor_wildcard) { 255 struct pf_kanchor *child; 256 rv = PF_TEST_OK; 257 RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { 258 rv = pf_match_translation_rule(rs_num, ctx, &child->ruleset); 259 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 260 /* 261 * we either hit a rule qith quick action 262 * (more likely), or hit some runtime 263 * error (e.g. pool_get() faillure). 264 */ 265 break; 266 } 267 } 268 } else { 269 rv = pf_match_translation_rule(rs_num, ctx, &r->anchor->ruleset); 270 } 271 272 ctx->depth--; 273 274 return (rv); 275 } 276 277 static struct pf_krule * 278 pf_match_translation(int rs_num, struct pf_test_ctx *ctx) 279 { 280 enum pf_test_status rv; 281 282 MPASS(ctx->depth == 0); 283 rv = pf_match_translation_rule(rs_num, ctx, &pf_main_ruleset); 284 MPASS(ctx->depth == 0); 285 if (rv != PF_TEST_OK) 286 return (NULL); 287 288 return (ctx->tr); 289 } 290 291 static int 292 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, 293 uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool, 294 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) 295 { 296 struct pf_state_key_cmp key; 297 struct pf_addr init_addr; 298 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; 299 int sidx = pd->sidx; 300 int didx = pd->didx; 301 302 bzero(&init_addr, sizeof(init_addr)); 303 304 MPASS(udp_mapping == NULL || 305 *udp_mapping == NULL); 306 307 /* 308 * If we are UDP and have an existing mapping we can get source port 309 * from the mapping. In this case we have to look up the src_node as 310 * pf_map_addr would. 311 */ 312 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) { 313 struct pf_udp_endpoint_cmp udp_source; 314 315 bzero(&udp_source, sizeof(udp_source)); 316 udp_source.af = pd->af; 317 pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); 318 udp_source.port = pd->nsport; 319 if (udp_mapping) { 320 struct pf_ksrc_node *sn = NULL; 321 struct pf_srchash *sh = NULL; 322 *udp_mapping = pf_udp_mapping_find(&udp_source); 323 if (*udp_mapping) { 324 pf_addrcpy(naddr, 325 &(*udp_mapping)->endpoints[1].addr, 326 pd->af); 327 *nport = (*udp_mapping)->endpoints[1].port; 328 /* 329 * Try to find a src_node as per pf_map_addr(). 330 * XXX: Why? This code seems to do nothing. 331 */ 332 if (rpool->opts & PF_POOL_STICKYADDR && 333 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 334 sn = pf_find_src_node(&pd->nsaddr, r, 335 pd->af, &sh, sn_type, false); 336 if (sn != NULL) 337 PF_SRC_NODE_UNLOCK(sn); 338 return (0); 339 } else { 340 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, 341 pd->nsport, &init_addr, 0); 342 if (*udp_mapping == NULL) 343 return (1); 344 } 345 } 346 } 347 348 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL, 349 &init_addr, rpool, sn_type)) 350 goto failed; 351 352 if (pd->proto == IPPROTO_ICMP) { 353 if (pd->ndport == htons(ICMP_ECHO)) { 354 low = 1; 355 high = 65535; 356 } else { 357 MPASS(udp_mapping == NULL || 358 *udp_mapping == NULL); 359 return (0); /* Don't try to modify non-echo ICMP */ 360 } 361 } 362 #ifdef INET6 363 if (pd->proto == IPPROTO_ICMPV6) { 364 if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) { 365 low = 1; 366 high = 65535; 367 } else { 368 MPASS(udp_mapping == NULL || 369 *udp_mapping == NULL); 370 return (0); /* Don't try to modify non-echo ICMP */ 371 } 372 } 373 #endif /* INET6 */ 374 375 bzero(&key, sizeof(key)); 376 key.af = pd->naf; 377 key.proto = pd->proto; 378 379 do { 380 pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); 381 pf_addrcpy(&key.addr[sidx], naddr, key.af); 382 key.port[didx] = pd->ndport; 383 384 if (udp_mapping && *udp_mapping) 385 pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, 386 pd->af); 387 388 /* 389 * port search; start random, step; 390 * similar 2 portloop in in_pcbbind 391 */ 392 if (pd->proto == IPPROTO_SCTP) { 393 key.port[sidx] = pd->nsport; 394 MPASS(udp_mapping == NULL || 395 *udp_mapping == NULL); 396 if (!pf_find_state_all_exists(&key, dir)) { 397 *nport = pd->nsport; 398 return (0); 399 } else { 400 return (1); /* Fail mapping. */ 401 } 402 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 403 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { 404 /* 405 * XXX bug: icmp states don't use the id on both sides. 406 * (traceroute -I through nat) 407 */ 408 key.port[sidx] = pd->nsport; 409 if (!pf_find_state_all_exists(&key, dir)) { 410 if (udp_mapping && *udp_mapping != NULL) { 411 (*udp_mapping)->endpoints[1].port = pd->nsport; 412 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 413 *nport = pd->nsport; 414 return (0); 415 } 416 } else { 417 MPASS(udp_mapping == NULL || 418 *udp_mapping == NULL); 419 *nport = pd->nsport; 420 return (0); 421 } 422 } 423 } else if (low == high) { 424 key.port[sidx] = htons(low); 425 if (!pf_find_state_all_exists(&key, dir)) { 426 if (udp_mapping && *udp_mapping != NULL) { 427 (*udp_mapping)->endpoints[1].port = htons(low); 428 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 429 *nport = htons(low); 430 return (0); 431 } 432 } else { 433 MPASS(udp_mapping == NULL || 434 *udp_mapping == NULL); 435 *nport = htons(low); 436 return (0); 437 } 438 } 439 } else { 440 uint32_t tmp; 441 uint16_t cut; 442 443 if (low > high) { 444 tmp = low; 445 low = high; 446 high = tmp; 447 } 448 /* low < high */ 449 cut = arc4random() % (1 + high - low) + low; 450 /* low <= cut <= high */ 451 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 452 if (udp_mapping && *udp_mapping != NULL) { 453 (*udp_mapping)->endpoints[sidx].port = htons(tmp); 454 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 455 *nport = htons(tmp); 456 return (0); 457 } 458 } else { 459 key.port[sidx] = htons(tmp); 460 if (!pf_find_state_all_exists(&key, dir)) { 461 *nport = htons(tmp); 462 MPASS(udp_mapping == NULL || 463 *udp_mapping == NULL); 464 return (0); 465 } 466 } 467 } 468 tmp = cut; 469 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 470 if (pd->proto == IPPROTO_UDP && 471 (rpool->opts & PF_POOL_ENDPI && 472 udp_mapping != NULL)) { 473 (*udp_mapping)->endpoints[1].port = htons(tmp); 474 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 475 *nport = htons(tmp); 476 return (0); 477 } 478 } else { 479 key.port[sidx] = htons(tmp); 480 if (!pf_find_state_all_exists(&key, dir)) { 481 MPASS(udp_mapping == NULL || 482 *udp_mapping == NULL); 483 *nport = htons(tmp); 484 return (0); 485 } 486 } 487 } 488 } 489 490 switch (rpool->opts & PF_POOL_TYPEMASK) { 491 case PF_POOL_RANDOM: 492 case PF_POOL_ROUNDROBIN: 493 /* 494 * pick a different source address since we're out 495 * of free port choices for the current one. 496 */ 497 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, 498 &(pd->naf), NULL, &init_addr, rpool, sn_type)) 499 goto failed; 500 break; 501 case PF_POOL_NONE: 502 case PF_POOL_SRCHASH: 503 case PF_POOL_BITMASK: 504 default: 505 goto failed; 506 } 507 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 508 509 failed: 510 if (udp_mapping) { 511 uma_zfree(V_pf_udp_mapping_z, *udp_mapping); 512 *udp_mapping = NULL; 513 } 514 515 return (1); /* none available */ 516 } 517 518 static bool 519 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) 520 { 521 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) 522 return (true); 523 return (false); 524 } 525 526 static int 527 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, 528 struct pf_addr *naddr, uint16_t *nport, struct pf_udp_mapping **udp_mapping, 529 struct pf_kpool *rpool) 530 { 531 uint16_t psmask, low, highmask; 532 uint16_t i, ahigh, cut; 533 int ashift, psidshift; 534 535 ashift = 16 - rpool->mape.offset; 536 psidshift = ashift - rpool->mape.psidlen; 537 psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1); 538 psmask = psmask << psidshift; 539 highmask = (1U << psidshift) - 1; 540 541 ahigh = (1U << rpool->mape.offset) - 1; 542 cut = arc4random() & ahigh; 543 if (cut == 0) 544 cut = 1; 545 546 for (i = cut; i <= ahigh; i++) { 547 low = (i << ashift) | psmask; 548 if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, 549 rpool, udp_mapping, PF_SN_NAT)) 550 return (0); 551 } 552 for (i = cut - 1; i > 0; i--) { 553 low = (i << ashift) | psmask; 554 if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, 555 rpool, udp_mapping, PF_SN_NAT)) 556 return (0); 557 } 558 return (1); 559 } 560 561 static __inline u_short 562 pf_check_src_node_valid(struct pf_ksrc_node *sn, struct pf_kpool *rpool) 563 { 564 struct pf_addr *raddr, *rmask; 565 struct pf_addr *caddr; /* cached redirection address */ 566 struct pf_kpooladdr *pa; 567 sa_family_t raf; 568 sa_family_t caf; /* cached redirection AF */ 569 u_short valid = 0; 570 571 KASSERT(sn != NULL, ("sn is NULL")); 572 KASSERT(rpool != NULL, ("rpool is NULL")); 573 574 /* check if the cached entry is still valid */ 575 576 if (sn->type == PF_SN_LIMIT) { 577 /* Always valid as it does not store redirection address */ 578 return (1); 579 } 580 581 mtx_lock(&rpool->mtx); 582 caddr = &(sn->raddr); 583 caf = sn->raf; 584 585 TAILQ_FOREACH(pa, &rpool->list, entries) { 586 if (PF_AZERO(caddr, caf)) { 587 valid = 1; 588 goto done; 589 } else if (pa->addr.type == PF_ADDR_DYNIFTL) { 590 if (pfr_kentry_byaddr(pa->addr.p.dyn->pfid_kt, caddr, caf, 0)) { 591 valid = 1; 592 goto done; 593 } 594 } else if (pa->addr.type == PF_ADDR_TABLE) { 595 if (pfr_kentry_byaddr(pa->addr.p.tbl, caddr, caf, 0)) { 596 valid = 1; 597 goto done; 598 } 599 } else if (pa->addr.type != PF_ADDR_NOROUTE) { 600 /* PF_ADDR_URPFFAILED, PF_ADDR_RANGE, PF_ADDR_ADDRMASK */ 601 raddr = &(pa->addr.v.a.addr); 602 rmask = &(pa->addr.v.a.mask); 603 raf = pa->af; 604 if (raf == caf && pf_match_addr(0, raddr, rmask, caddr, caf)) { 605 valid = 1; 606 goto done; 607 } 608 } 609 /* else PF_ADDR_NOROUTE */ 610 } 611 612 done: 613 mtx_unlock(&rpool->mtx); 614 615 return (valid); 616 } 617 618 u_short 619 pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, 620 struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf, 621 struct pf_addr *init_addr, struct pf_kpool *rpool) 622 { 623 u_short reason = PFRES_MATCH; 624 struct pf_addr *raddr = NULL, *rmask = NULL; 625 struct pfr_ktable *kt; 626 uint64_t hashidx; 627 int cnt; 628 sa_family_t wanted_af; 629 u_int8_t pool_type; 630 bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; 631 632 KASSERT(saf != 0, ("%s: saf == 0", __func__)); 633 KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); 634 KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); 635 636 /* 637 * Given (*naf) is a hint about AF of the forwarded packet. 638 * It might be changed if prefer_ipv6_nexthop is enabled and 639 * the combination of nexthop AF and packet AF allows for it. 640 */ 641 wanted_af = (*naf); 642 643 mtx_lock(&rpool->mtx); 644 /* Find the route using chosen algorithm. Store the found route 645 in src_node if it was given or found. */ 646 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { 647 reason = PFRES_MAPFAILED; 648 goto done_pool_mtx; 649 } 650 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 651 switch (wanted_af) { 652 #ifdef INET 653 case AF_INET: 654 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 655 !PF_POOL_DYNTYPE(rpool->opts)) { 656 reason = PFRES_MAPFAILED; 657 goto done_pool_mtx; 658 } 659 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 660 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 661 break; 662 #endif /* INET */ 663 #ifdef INET6 664 case AF_INET6: 665 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 666 !PF_POOL_DYNTYPE(rpool->opts)) { 667 reason = PFRES_MAPFAILED; 668 goto done_pool_mtx; 669 } 670 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 671 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 672 break; 673 #endif /* INET6 */ 674 default: 675 unhandled_af(wanted_af); 676 } 677 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 678 if (!PF_POOL_DYNTYPE(rpool->opts)) { 679 reason = PFRES_MAPFAILED; 680 goto done_pool_mtx; /* unsupported */ 681 } 682 } else { 683 raddr = &rpool->cur->addr.v.a.addr; 684 rmask = &rpool->cur->addr.v.a.mask; 685 } 686 687 /* 688 * For pools with a single host with the prefer-ipv6-nexthop option 689 * we can return pool address of any AF, unless the forwarded packet 690 * is IPv6, then we can return only if pool address is IPv6. 691 * For non-prefer-ipv6-nexthop we can return pool address only 692 * of wanted AF, unless the pool address'es AF is unknown, which 693 * happens in case old ioctls have been used to set up the pool. 694 * 695 * Round-robin pools have their own logic for retrying next addresses. 696 */ 697 pool_type = rpool->opts & PF_POOL_TYPEMASK; 698 if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || 699 ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && 700 rpool->cur->addr.type != PF_ADDR_TABLE && 701 rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { 702 if (prefer_ipv6_nexthop) { 703 if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { 704 reason = PFRES_MAPFAILED; 705 goto done_pool_mtx; 706 } 707 wanted_af = rpool->cur->af; 708 } else { 709 if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { 710 reason = PFRES_MAPFAILED; 711 goto done_pool_mtx; 712 } 713 } 714 } 715 716 switch (pool_type) { 717 case PF_POOL_NONE: 718 pf_addrcpy(naddr, raddr, wanted_af); 719 break; 720 case PF_POOL_BITMASK: 721 pf_poolmask(naddr, raddr, rmask, saddr, wanted_af); 722 break; 723 case PF_POOL_RANDOM: 724 if (rpool->cur->addr.type == PF_ADDR_TABLE || 725 rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 726 if (rpool->cur->addr.type == PF_ADDR_TABLE) 727 kt = rpool->cur->addr.p.tbl; 728 else 729 kt = rpool->cur->addr.p.dyn->pfid_kt; 730 kt = pfr_ktable_select_active(kt); 731 if (kt == NULL) { 732 reason = PFRES_MAPFAILED; 733 goto done_pool_mtx; /* unsupported */ 734 } 735 cnt = kt->pfrkt_cnt; 736 if (cnt == 0) 737 rpool->tblidx = 0; 738 else 739 rpool->tblidx = (int)arc4random_uniform(cnt); 740 memset(&rpool->counter, 0, sizeof(rpool->counter)); 741 if (prefer_ipv6_nexthop) 742 wanted_af = AF_INET6; 743 retry_other_af_random: 744 if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, 745 wanted_af, pf_islinklocal, false)) { 746 /* Retry with IPv4 nexthop for IPv4 traffic */ 747 if (prefer_ipv6_nexthop && 748 wanted_af == AF_INET6 && 749 (*naf) == AF_INET) { 750 wanted_af = AF_INET; 751 goto retry_other_af_random; 752 } else { 753 /* no hosts in wanted AF */ 754 reason = PFRES_MAPFAILED; 755 goto done_pool_mtx; 756 } 757 } 758 pf_addrcpy(naddr, &rpool->counter, wanted_af); 759 } else if (init_addr != NULL && PF_AZERO(init_addr, 760 wanted_af)) { 761 switch (wanted_af) { 762 #ifdef INET 763 case AF_INET: 764 rpool->counter.addr32[0] = arc4random(); 765 break; 766 #endif /* INET */ 767 #ifdef INET6 768 case AF_INET6: 769 if (rmask->addr32[3] != 0xffffffff) 770 rpool->counter.addr32[3] = 771 arc4random(); 772 else 773 break; 774 if (rmask->addr32[2] != 0xffffffff) 775 rpool->counter.addr32[2] = 776 arc4random(); 777 else 778 break; 779 if (rmask->addr32[1] != 0xffffffff) 780 rpool->counter.addr32[1] = 781 arc4random(); 782 else 783 break; 784 if (rmask->addr32[0] != 0xffffffff) 785 rpool->counter.addr32[0] = 786 arc4random(); 787 break; 788 #endif /* INET6 */ 789 } 790 pf_poolmask(naddr, raddr, rmask, &rpool->counter, 791 wanted_af); 792 pf_addrcpy(init_addr, naddr, wanted_af); 793 794 } else { 795 pf_addr_inc(&rpool->counter, wanted_af); 796 pf_poolmask(naddr, raddr, rmask, &rpool->counter, 797 wanted_af); 798 } 799 break; 800 case PF_POOL_SRCHASH: 801 { 802 unsigned char hash[16]; 803 804 hashidx = 805 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, 806 wanted_af); 807 if (rpool->cur->addr.type == PF_ADDR_TABLE || 808 rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 809 if (rpool->cur->addr.type == PF_ADDR_TABLE) 810 kt = rpool->cur->addr.p.tbl; 811 else 812 kt = rpool->cur->addr.p.dyn->pfid_kt; 813 kt = pfr_ktable_select_active(kt); 814 if (kt == NULL) { 815 reason = PFRES_MAPFAILED; 816 goto done_pool_mtx; /* unsupported */ 817 } 818 cnt = kt->pfrkt_cnt; 819 if (cnt == 0) 820 rpool->tblidx = 0; 821 else 822 rpool->tblidx = (int)(hashidx % cnt); 823 memset(&rpool->counter, 0, sizeof(rpool->counter)); 824 if (prefer_ipv6_nexthop) 825 wanted_af = AF_INET6; 826 retry_other_af_srchash: 827 if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, 828 wanted_af, pf_islinklocal, false)) { 829 /* Retry with IPv4 nexthop for IPv4 traffic */ 830 if (prefer_ipv6_nexthop && 831 wanted_af == AF_INET6 && 832 (*naf) == AF_INET) { 833 wanted_af = AF_INET; 834 goto retry_other_af_srchash; 835 } else { 836 /* no hosts in wanted AF */ 837 reason = PFRES_MAPFAILED; 838 goto done_pool_mtx; 839 } 840 } 841 pf_addrcpy(naddr, &rpool->counter, wanted_af); 842 } else { 843 pf_poolmask(naddr, raddr, rmask, 844 (struct pf_addr *)&hash, wanted_af); 845 } 846 break; 847 } 848 case PF_POOL_ROUNDROBIN: 849 { 850 struct pf_kpooladdr *acur = rpool->cur; 851 852 retry_other_af_rr: 853 if (prefer_ipv6_nexthop) 854 wanted_af = rpool->ipv6_nexthop_af; 855 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 856 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 857 &rpool->tblidx, &rpool->counter, wanted_af, 858 NULL, true)) 859 goto get_addr; 860 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 861 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 862 &rpool->tblidx, &rpool->counter, wanted_af, 863 pf_islinklocal, true)) 864 goto get_addr; 865 } else if (rpool->cur->af == wanted_af && 866 pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) 867 goto get_addr; 868 if (prefer_ipv6_nexthop && 869 (*naf) == AF_INET && wanted_af == AF_INET6) { 870 /* Reset table index when changing wanted AF. */ 871 rpool->tblidx = -1; 872 rpool->ipv6_nexthop_af = AF_INET; 873 goto retry_other_af_rr; 874 } 875 try_next: 876 /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ 877 rpool->ipv6_nexthop_af = AF_INET6; 878 if (TAILQ_NEXT(rpool->cur, entries) == NULL) 879 rpool->cur = TAILQ_FIRST(&rpool->list); 880 else 881 rpool->cur = TAILQ_NEXT(rpool->cur, entries); 882 try_next_ipv6_nexthop_rr: 883 /* Reset table index when iterating pools or changing wanted AF. */ 884 rpool->tblidx = -1; 885 if (prefer_ipv6_nexthop) 886 wanted_af = rpool->ipv6_nexthop_af; 887 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 888 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 889 &rpool->tblidx, &rpool->counter, wanted_af, NULL, 890 true)) 891 goto get_addr; 892 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 893 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 894 &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, 895 true)) 896 goto get_addr; 897 } else { 898 if (rpool->cur->af == wanted_af) { 899 raddr = &rpool->cur->addr.v.a.addr; 900 rmask = &rpool->cur->addr.v.a.mask; 901 pf_addrcpy(&rpool->counter, raddr, wanted_af); 902 goto get_addr; 903 } 904 } 905 if (prefer_ipv6_nexthop && 906 (*naf) == AF_INET && wanted_af == AF_INET6) { 907 rpool->ipv6_nexthop_af = AF_INET; 908 goto try_next_ipv6_nexthop_rr; 909 } 910 if (rpool->cur != acur) 911 goto try_next; 912 reason = PFRES_MAPFAILED; 913 goto done_pool_mtx; 914 get_addr: 915 pf_addrcpy(naddr, &rpool->counter, wanted_af); 916 if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) 917 pf_addrcpy(init_addr, naddr, wanted_af); 918 pf_addr_inc(&rpool->counter, wanted_af); 919 break; 920 } 921 } 922 923 if (wanted_af == 0) { 924 reason = PFRES_MAPFAILED; 925 goto done_pool_mtx; 926 } 927 928 if (nkif) 929 *nkif = rpool->cur->kif; 930 931 (*naf) = wanted_af; 932 933 done_pool_mtx: 934 mtx_unlock(&rpool->mtx); 935 936 return (reason); 937 } 938 939 u_short 940 pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, 941 struct pf_addr *naddr, sa_family_t *naf, struct pfi_kkif **nkif, 942 struct pf_addr *init_addr, struct pf_kpool *rpool, pf_sn_types_t sn_type) 943 { 944 struct pf_ksrc_node *sn = NULL; 945 struct pf_srchash *sh = NULL; 946 u_short reason = 0; 947 948 /* 949 * If this is a sticky-address rule, try to find an existing src_node. 950 */ 951 if (rpool->opts & PF_POOL_STICKYADDR && 952 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 953 sn = pf_find_src_node(saddr, r, saf, &sh, sn_type, false); 954 955 if (sn != NULL) { 956 PF_SRC_NODE_LOCK_ASSERT(sn); 957 /* 958 * Check if source node's redirection address still exists 959 * in pool from which the SN was created. If not, delete it. 960 * Similar to pf_kill_srcnodes(). Unlink the source node 961 * from tree, unlink it from states, then free it. Do not 962 * overlap source node and state locks to avoid LOR. 963 */ 964 if (!pf_check_src_node_valid(sn, rpool)) { 965 pf_unlink_src_node(sn); 966 PF_SRC_NODE_UNLOCK(sn); 967 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 968 printf("%s: stale src tracking (%d) ", 969 __func__, sn_type); 970 pf_print_host(saddr, 0, saf); 971 printf(" to "); 972 pf_print_host(&(sn->raddr), 0, sn->raf); 973 if (nkif) 974 printf("@%s", sn->rkif->pfik_name); 975 printf("\n"); 976 } 977 978 for (int i = 0; i <= V_pf_hashmask; i++) { 979 struct pf_idhash *ih = &V_pf_idhash[i]; 980 struct pf_kstate *st; 981 982 PF_HASHROW_LOCK(ih); 983 LIST_FOREACH(st, &ih->states, entry) { 984 if (st->sns[sn->type] == sn) { 985 st->sns[sn->type] = NULL; 986 } 987 } 988 PF_HASHROW_UNLOCK(ih); 989 } 990 pf_free_src_node(sn); 991 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); 992 sn = NULL; 993 goto map_addr; 994 } 995 996 (*naf) = sn->raf; 997 998 /* If the supplied address is the same as the current one we've 999 * been asked before, so tell the caller that there's no other 1000 * address to be had. */ 1001 1002 if (PF_AEQ(naddr, &(sn->raddr), *naf)) { 1003 printf("%s: no more addresses\n", __func__); 1004 reason = PFRES_MAPFAILED; 1005 goto done; 1006 } 1007 1008 pf_addrcpy(naddr, &(sn->raddr), *naf); 1009 1010 if (nkif) 1011 *nkif = sn->rkif; 1012 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 1013 printf("%s: src tracking maps ", __func__); 1014 pf_print_host(saddr, 0, saf); 1015 printf(" to "); 1016 pf_print_host(naddr, 0, *naf); 1017 if (nkif) 1018 printf("@%s", (*nkif)->pfik_name); 1019 printf("\n"); 1020 } 1021 goto done; 1022 } 1023 1024 map_addr: 1025 /* 1026 * Source node has not been found or is invalid. Find a new address 1027 * and store it in variables given by the caller. 1028 */ 1029 if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr, 1030 rpool)) != 0) { 1031 if (V_pf_status.debug >= PF_DEBUG_MISC) 1032 printf("%s: pf_map_addr has failed\n", __func__); 1033 goto done; 1034 } 1035 1036 if (V_pf_status.debug >= PF_DEBUG_NOISY && 1037 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 1038 printf("%s: selected address ", __func__); 1039 pf_print_host(naddr, 0, *naf); 1040 if (nkif) 1041 printf("@%s", (*nkif)->pfik_name); 1042 printf("\n"); 1043 } 1044 1045 done: 1046 if (sn != NULL) 1047 PF_SRC_NODE_UNLOCK(sn); 1048 1049 return (reason); 1050 } 1051 1052 u_short 1053 pf_get_translation(struct pf_test_ctx *ctx) 1054 { 1055 struct pf_krule *r = NULL; 1056 u_short transerror; 1057 1058 PF_RULES_RASSERT(); 1059 KASSERT(ctx->sk == NULL, ("*skp not NULL")); 1060 KASSERT(ctx->nk == NULL, ("*nkp not NULL")); 1061 1062 ctx->nr = NULL; 1063 1064 if (ctx->pd->dir == PF_OUT) { 1065 r = pf_match_translation(PF_RULESET_BINAT, ctx); 1066 if (r == NULL) 1067 r = pf_match_translation(PF_RULESET_NAT, ctx); 1068 } else { 1069 r = pf_match_translation(PF_RULESET_RDR, ctx); 1070 if (r == NULL) 1071 r = pf_match_translation(PF_RULESET_BINAT, ctx); 1072 } 1073 1074 if (r == NULL) 1075 return (PFRES_MAX); 1076 1077 switch (r->action) { 1078 case PF_NONAT: 1079 case PF_NOBINAT: 1080 case PF_NORDR: 1081 return (PFRES_MAX); 1082 } 1083 1084 transerror = pf_get_transaddr(ctx, r, r->action, &(r->rdr)); 1085 if (transerror == PFRES_MATCH) 1086 ctx->nr = r; 1087 1088 return (transerror); 1089 } 1090 1091 u_short 1092 pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, 1093 uint8_t nat_action, struct pf_kpool *rpool) 1094 { 1095 struct pf_pdesc *pd = ctx->pd; 1096 struct pf_addr *naddr; 1097 int idx; 1098 uint16_t *nportp; 1099 uint16_t low, high; 1100 u_short reason; 1101 1102 PF_RULES_RASSERT(); 1103 KASSERT(r != NULL, ("r is NULL")); 1104 KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule")); 1105 1106 if (ctx->sk == NULL && ctx->nk == NULL) { 1107 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &ctx->sk, 1108 &ctx->nk)) 1109 return (PFRES_MEMORY); 1110 } 1111 1112 switch (nat_action) { 1113 case PF_NAT: 1114 idx = pd->sidx; 1115 break; 1116 case PF_BINAT: 1117 idx = 1; 1118 break; 1119 case PF_RDR: 1120 idx = pd->didx; 1121 break; 1122 } 1123 naddr = &ctx->nk->addr[idx]; 1124 nportp = &ctx->nk->port[idx]; 1125 1126 switch (nat_action) { 1127 case PF_NAT: 1128 if (pd->proto == IPPROTO_ICMP) { 1129 low = 1; 1130 high = 65535; 1131 } else { 1132 low = rpool->proxy_port[0]; 1133 high = rpool->proxy_port[1]; 1134 } 1135 if (rpool->mape.offset > 0) { 1136 if (pf_get_mape_sport(pd, r, naddr, nportp, 1137 &ctx->udp_mapping, rpool)) { 1138 DPFPRINTF(PF_DEBUG_MISC, 1139 "pf: MAP-E port allocation (%u/%u/%u)" 1140 " failed", 1141 rpool->mape.offset, 1142 rpool->mape.psidlen, 1143 rpool->mape.psid); 1144 reason = PFRES_MAPFAILED; 1145 goto notrans; 1146 } 1147 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, 1148 rpool, &ctx->udp_mapping, PF_SN_NAT)) { 1149 DPFPRINTF(PF_DEBUG_MISC, 1150 "pf: NAT proxy port allocation (%u-%u) failed", 1151 rpool->proxy_port[0], rpool->proxy_port[1]); 1152 reason = PFRES_MAPFAILED; 1153 goto notrans; 1154 } 1155 break; 1156 case PF_BINAT: 1157 switch (pd->dir) { 1158 case PF_OUT: 1159 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){ 1160 switch (pd->af) { 1161 #ifdef INET 1162 case AF_INET: 1163 if (rpool->cur->addr.p.dyn-> 1164 pfid_acnt4 < 1) { 1165 reason = PFRES_MAPFAILED; 1166 goto notrans; 1167 } 1168 pf_poolmask(naddr, 1169 &rpool->cur->addr.p.dyn->pfid_addr4, 1170 &rpool->cur->addr.p.dyn->pfid_mask4, 1171 &pd->nsaddr, AF_INET); 1172 break; 1173 #endif /* INET */ 1174 #ifdef INET6 1175 case AF_INET6: 1176 if (rpool->cur->addr.p.dyn-> 1177 pfid_acnt6 < 1) { 1178 reason = PFRES_MAPFAILED; 1179 goto notrans; 1180 } 1181 pf_poolmask(naddr, 1182 &rpool->cur->addr.p.dyn->pfid_addr6, 1183 &rpool->cur->addr.p.dyn->pfid_mask6, 1184 &pd->nsaddr, AF_INET6); 1185 break; 1186 #endif /* INET6 */ 1187 } 1188 } else 1189 pf_poolmask(naddr, 1190 &rpool->cur->addr.v.a.addr, 1191 &rpool->cur->addr.v.a.mask, &pd->nsaddr, 1192 pd->af); 1193 break; 1194 case PF_IN: 1195 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 1196 switch (pd->af) { 1197 #ifdef INET 1198 case AF_INET: 1199 if (r->src.addr.p.dyn->pfid_acnt4 < 1) { 1200 reason = PFRES_MAPFAILED; 1201 goto notrans; 1202 } 1203 pf_poolmask(naddr, 1204 &r->src.addr.p.dyn->pfid_addr4, 1205 &r->src.addr.p.dyn->pfid_mask4, 1206 &pd->ndaddr, AF_INET); 1207 break; 1208 #endif /* INET */ 1209 #ifdef INET6 1210 case AF_INET6: 1211 if (r->src.addr.p.dyn->pfid_acnt6 < 1) { 1212 reason = PFRES_MAPFAILED; 1213 goto notrans; 1214 } 1215 pf_poolmask(naddr, 1216 &r->src.addr.p.dyn->pfid_addr6, 1217 &r->src.addr.p.dyn->pfid_mask6, 1218 &pd->ndaddr, AF_INET6); 1219 break; 1220 #endif /* INET6 */ 1221 } 1222 } else 1223 pf_poolmask(naddr, &r->src.addr.v.a.addr, 1224 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); 1225 break; 1226 } 1227 break; 1228 case PF_RDR: { 1229 struct pf_state_key_cmp key; 1230 int tries; 1231 uint16_t cut, low, high, nport; 1232 1233 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, 1234 &(pd->naf), NULL, NULL, rpool, PF_SN_NAT); 1235 1236 if (reason != 0) 1237 goto notrans; 1238 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 1239 pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, 1240 &pd->ndaddr, pd->af); 1241 1242 /* Do not change SCTP ports. */ 1243 if (pd->proto == IPPROTO_SCTP) 1244 break; 1245 1246 if (rpool->proxy_port[1]) { 1247 uint32_t tmp_nport; 1248 uint16_t div; 1249 1250 div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; 1251 div = (div == 0) ? 1 : div; 1252 1253 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + 1254 rpool->proxy_port[0]; 1255 1256 /* Wrap around if necessary. */ 1257 if (tmp_nport > 65535) 1258 tmp_nport -= 65535; 1259 nport = htons((uint16_t)tmp_nport); 1260 } else if (rpool->proxy_port[0]) 1261 nport = htons(rpool->proxy_port[0]); 1262 else 1263 nport = pd->ndport; 1264 1265 /* 1266 * Update the destination port. 1267 */ 1268 *nportp = nport; 1269 1270 /* 1271 * Do we have a source port conflict in the stack state? Try to 1272 * modulate the source port if so. Note that this is racy since 1273 * the state lookup may not find any matches here but will once 1274 * pf_create_state() actually instantiates the state. 1275 */ 1276 bzero(&key, sizeof(key)); 1277 key.af = pd->af; 1278 key.proto = pd->proto; 1279 key.port[0] = pd->nsport; 1280 pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); 1281 key.port[1] = nport; 1282 pf_addrcpy(&key.addr[1], naddr, key.af); 1283 1284 if (!pf_find_state_all_exists(&key, PF_OUT)) 1285 break; 1286 1287 tries = 0; 1288 1289 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ 1290 high = 65535; 1291 cut = arc4random() % (1 + high - low) + low; 1292 for (uint32_t tmp = cut; 1293 tmp <= high && tmp <= UINT16_MAX && 1294 tries < V_pf_rdr_srcport_rewrite_tries; 1295 tmp++, tries++) { 1296 key.port[0] = htons(tmp); 1297 if (!pf_find_state_all_exists(&key, PF_OUT)) { 1298 /* Update the source port. */ 1299 ctx->nk->port[0] = htons(tmp); 1300 goto out; 1301 } 1302 } 1303 for (uint32_t tmp = cut - 1; 1304 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries; 1305 tmp--, tries++) { 1306 key.port[0] = htons(tmp); 1307 if (!pf_find_state_all_exists(&key, PF_OUT)) { 1308 /* Update the source port. */ 1309 ctx->nk->port[0] = htons(tmp); 1310 goto out; 1311 } 1312 } 1313 1314 /* 1315 * We failed to find a match. Push on ahead anyway, let 1316 * pf_state_insert() be the arbiter of whether the state 1317 * conflict is tolerable. In particular, with TCP connections 1318 * the state may be reused if the TCP state is terminal. 1319 */ 1320 DPFPRINTF(PF_DEBUG_MISC, 1321 "pf: RDR source port allocation failed"); 1322 break; 1323 1324 out: 1325 DPFPRINTF(PF_DEBUG_MISC, 1326 "pf: RDR source port allocation %u->%u", 1327 ntohs(pd->nsport), ntohs(ctx->nk->port[0])); 1328 break; 1329 } 1330 default: 1331 panic("%s: unknown action %u", __func__, r->action); 1332 } 1333 1334 /* Return success only if translation really happened. */ 1335 if (bcmp(ctx->sk, ctx->nk, sizeof(struct pf_state_key_cmp))) { 1336 return (PFRES_MATCH); 1337 } 1338 1339 reason = PFRES_MAX; 1340 notrans: 1341 uma_zfree(V_pf_state_key_z, ctx->nk); 1342 uma_zfree(V_pf_state_key_z, ctx->sk); 1343 ctx->sk = ctx->nk = NULL; 1344 1345 return (reason); 1346 } 1347 1348 int 1349 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) 1350 { 1351 #if defined(INET) && defined(INET6) 1352 struct pf_addr ndaddr, nsaddr, naddr; 1353 u_int16_t nport = 0; 1354 int prefixlen = 96; 1355 1356 bzero(&nsaddr, sizeof(nsaddr)); 1357 bzero(&ndaddr, sizeof(ndaddr)); 1358 1359 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1360 printf("pf: af-to %s %s, ", 1361 pd->naf == AF_INET ? "inet" : "inet6", 1362 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr"); 1363 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 1364 printf(" -> "); 1365 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 1366 printf("\n"); 1367 } 1368 1369 if (TAILQ_EMPTY(&r->nat.list)) 1370 panic("pf_get_transaddr_af: no nat pool for source address"); 1371 1372 /* get source address and port */ 1373 if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0], 1374 r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) { 1375 DPFPRINTF(PF_DEBUG_MISC, 1376 "pf: af-to NAT proxy port allocation (%u-%u) failed", 1377 r->nat.proxy_port[0], r->nat.proxy_port[1]); 1378 return (-1); 1379 } 1380 1381 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 1382 pd->ndport = ntohs(pd->ndport); 1383 if (pd->ndport == ICMP6_ECHO_REQUEST) 1384 pd->ndport = ICMP_ECHO; 1385 else if (pd->ndport == ICMP6_ECHO_REPLY) 1386 pd->ndport = ICMP_ECHOREPLY; 1387 pd->ndport = htons(pd->ndport); 1388 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 1389 pd->nsport = ntohs(pd->nsport); 1390 if (pd->ndport == ICMP_ECHO) 1391 pd->ndport = ICMP6_ECHO_REQUEST; 1392 else if (pd->ndport == ICMP_ECHOREPLY) 1393 pd->ndport = ICMP6_ECHO_REPLY; 1394 pd->nsport = htons(pd->nsport); 1395 } 1396 1397 /* get the destination address and port */ 1398 if (! TAILQ_EMPTY(&r->rdr.list)) { 1399 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, &(pd->naf), 1400 NULL, NULL, &r->rdr, PF_SN_NAT)) 1401 return (-1); 1402 if (r->rdr.proxy_port[0]) 1403 pd->ndport = htons(r->rdr.proxy_port[0]); 1404 1405 if (pd->naf == AF_INET) { 1406 /* The prefix is the IPv4 rdr address */ 1407 prefixlen = in_mask2len( 1408 (struct in_addr *)&r->rdr.cur->addr.v.a.mask); 1409 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1410 prefixlen); 1411 } else { 1412 /* The prefix is the IPv6 rdr address */ 1413 prefixlen = in6_mask2len( 1414 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL); 1415 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1416 prefixlen); 1417 } 1418 } else { 1419 if (pd->naf == AF_INET) { 1420 /* The prefix is the IPv6 dst address */ 1421 prefixlen = in6_mask2len( 1422 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL); 1423 if (prefixlen < 32) 1424 prefixlen = 96; 1425 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr, 1426 prefixlen); 1427 } else { 1428 /* 1429 * The prefix is the IPv6 nat address 1430 * (that was stored in pd->nsaddr) 1431 */ 1432 prefixlen = in6_mask2len( 1433 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL); 1434 if (prefixlen > 96) 1435 prefixlen = 96; 1436 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr, 1437 prefixlen); 1438 } 1439 } 1440 1441 pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); 1442 pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); 1443 1444 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1445 printf("pf: af-to %s done, prefixlen %d, ", 1446 pd->naf == AF_INET ? "inet" : "inet6", 1447 prefixlen); 1448 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 1449 printf(" -> "); 1450 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 1451 printf("\n"); 1452 } 1453 1454 return (0); 1455 #else 1456 return (-1); 1457 #endif 1458 } 1459