1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_pf.h" 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 44 #include <sys/param.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <crypto/siphash/siphash.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 #include <net/pfvar.h> 56 #include <net/if_pflog.h> 57 58 #ifdef INET 59 #include <netinet/in_var.h> 60 #endif /* INET */ 61 62 #ifdef INET6 63 #include <netinet6/in6_var.h> 64 #endif /* INET6 */ 65 66 67 /* 68 * Limit the amount of work we do to find a free source port for redirects that 69 * introduce a state conflict. 70 */ 71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) 72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; 73 74 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, 75 struct pf_poolhashkey *, sa_family_t); 76 static struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); 77 static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *, 78 struct pf_krule *); 79 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, 80 struct pf_addr *, uint16_t *, uint16_t, uint16_t, 81 struct pf_kpool *, struct pf_udp_mapping **, 82 pf_sn_types_t); 83 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); 84 85 static uint64_t 86 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 87 struct pf_poolhashkey *key, sa_family_t af) 88 { 89 SIPHASH_CTX ctx; 90 #ifdef INET6 91 union { 92 uint64_t hash64; 93 uint32_t hash32[2]; 94 } h; 95 #endif /* INET6 */ 96 uint64_t res = 0; 97 98 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); 99 100 switch (af) { 101 #ifdef INET 102 case AF_INET: 103 res = SipHash24(&ctx, (const uint8_t *)key, 104 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 105 hash->addr32[0] = res; 106 break; 107 #endif /* INET */ 108 #ifdef INET6 109 case AF_INET6: 110 res = SipHash24(&ctx, (const uint8_t *)key, 111 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); 112 h.hash64 = res; 113 hash->addr32[0] = h.hash32[0]; 114 hash->addr32[1] = h.hash32[1]; 115 /* 116 * siphash isn't big enough, but flipping it around is 117 * good enough here. 118 */ 119 hash->addr32[2] = ~h.hash32[1]; 120 hash->addr32[3] = ~h.hash32[0]; 121 break; 122 #endif /* INET6 */ 123 default: 124 unhandled_af(af); 125 } 126 return (res); 127 } 128 129 #define PF_TEST_ATTRIB(t, a) \ 130 if (t) { \ 131 r = a; \ 132 continue; \ 133 } else do { \ 134 } while (0) 135 136 static enum pf_test_status 137 pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) 138 { 139 struct pf_krule *r; 140 struct pf_pdesc *pd = ctx->pd; 141 int rtableid = -1; 142 143 r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); 144 while (r != NULL) { 145 struct pf_rule_addr *src = NULL, *dst = NULL; 146 struct pf_addr_wrap *xdst = NULL; 147 148 if (r->action == PF_BINAT && pd->dir == PF_IN) { 149 src = &r->dst; 150 if (r->rdr.cur != NULL) 151 xdst = &r->rdr.cur->addr; 152 } else { 153 src = &r->src; 154 dst = &r->dst; 155 } 156 157 pf_counter_u64_add(&r->evaluations, 1); 158 PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, 159 r->skip[PF_SKIP_IFP]); 160 PF_TEST_ATTRIB(r->direction && r->direction != pd->dir, 161 r->skip[PF_SKIP_DIR]); 162 PF_TEST_ATTRIB(r->af && r->af != pd->af, 163 r->skip[PF_SKIP_AF]); 164 PF_TEST_ATTRIB(r->proto && r->proto != pd->proto, 165 r->skip[PF_SKIP_PROTO]); 166 PF_TEST_ATTRIB(PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af, 167 src->neg, pd->kif, M_GETFIB(pd->m)), 168 r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 169 PF_SKIP_DST_ADDR]); 170 PF_TEST_ATTRIB(src->port_op && !pf_match_port(src->port_op, 171 src->port[0], src->port[1], pd->nsport), 172 r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 173 PF_SKIP_DST_PORT]); 174 PF_TEST_ATTRIB(dst != NULL && 175 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL, 176 M_GETFIB(pd->m)), 177 r->skip[PF_SKIP_DST_ADDR]); 178 PF_TEST_ATTRIB(xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af, 179 0, NULL, M_GETFIB(pd->m)), 180 TAILQ_NEXT(r, entries)); 181 PF_TEST_ATTRIB(dst != NULL && dst->port_op && 182 !pf_match_port(dst->port_op, dst->port[0], 183 dst->port[1], pd->ndport), 184 r->skip[PF_SKIP_DST_PORT]); 185 PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag, 186 pd->pf_mtag ? pd->pf_mtag->tag : 0), 187 TAILQ_NEXT(r, entries)); 188 PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 189 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, 190 &pd->hdr.tcp), r->os_fingerprint)), 191 TAILQ_NEXT(r, entries)); 192 if (r->tag) 193 ctx->tag = r->tag; 194 if (r->rtableid >= 0) 195 rtableid = r->rtableid; 196 if (r->anchor == NULL) { 197 if (r->action == PF_NONAT || 198 r->action == PF_NORDR || 199 r->action == PF_NOBINAT) { 200 *ctx->rm = NULL; 201 } else { 202 /* 203 * found matching r 204 */ 205 ctx->tr = r; 206 /* 207 * anchor, with ruleset, where r belongs to 208 */ 209 *ctx->am = ctx->a; 210 /* 211 * ruleset where r belongs to 212 */ 213 *ctx->rsm = ruleset; 214 /* 215 * ruleset, where anchor belongs to. 216 */ 217 ctx->arsm = ctx->aruleset; 218 } 219 break; 220 } else { 221 ctx->a = r; /* remember anchor */ 222 ctx->aruleset = ruleset; /* and its ruleset */ 223 if (pf_step_into_translation_anchor(rs_num, ctx, 224 r) != PF_TEST_OK) { 225 break; 226 } 227 } 228 r = TAILQ_NEXT(r, entries); 229 } 230 231 if (ctx->tag > 0 && pf_tag_packet(pd, ctx->tag)) 232 return (PF_TEST_FAIL); 233 if (rtableid >= 0) 234 M_SETFIB(pd->m, rtableid); 235 236 return (PF_TEST_OK); 237 } 238 239 static enum pf_test_status 240 pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_krule *r) 241 { 242 enum pf_test_status rv; 243 244 PF_RULES_RASSERT(); 245 246 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 247 printf("%s: anchor stack overflow on %s\n", 248 __func__, r->anchor->name); 249 return (PF_TEST_FAIL); 250 } 251 252 ctx->depth++; 253 254 if (r->anchor_wildcard) { 255 struct pf_kanchor *child; 256 rv = PF_TEST_OK; 257 RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { 258 rv = pf_match_translation_rule(rs_num, ctx, &child->ruleset); 259 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 260 /* 261 * we either hit a rule qith quick action 262 * (more likely), or hit some runtime 263 * error (e.g. pool_get() faillure). 264 */ 265 break; 266 } 267 } 268 } else { 269 rv = pf_match_translation_rule(rs_num, ctx, &r->anchor->ruleset); 270 } 271 272 ctx->depth--; 273 274 return (rv); 275 } 276 277 static struct pf_krule * 278 pf_match_translation(int rs_num, struct pf_test_ctx *ctx) 279 { 280 enum pf_test_status rv; 281 282 MPASS(ctx->depth == 0); 283 rv = pf_match_translation_rule(rs_num, ctx, &pf_main_ruleset); 284 MPASS(ctx->depth == 0); 285 if (rv != PF_TEST_OK) 286 return (NULL); 287 288 return (ctx->tr); 289 } 290 291 static int 292 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, 293 uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool, 294 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) 295 { 296 struct pf_state_key_cmp key; 297 struct pf_addr init_addr; 298 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; 299 int sidx = pd->sidx; 300 int didx = pd->didx; 301 302 bzero(&init_addr, sizeof(init_addr)); 303 304 MPASS(udp_mapping == NULL || 305 *udp_mapping == NULL); 306 307 /* 308 * If we are UDP and have an existing mapping we can get source port 309 * from the mapping. In this case we have to look up the src_node as 310 * pf_map_addr would. 311 */ 312 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) { 313 struct pf_udp_endpoint_cmp udp_source; 314 315 bzero(&udp_source, sizeof(udp_source)); 316 udp_source.af = pd->af; 317 pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); 318 udp_source.port = pd->nsport; 319 if (udp_mapping) { 320 struct pf_ksrc_node *sn = NULL; 321 struct pf_srchash *sh = NULL; 322 *udp_mapping = pf_udp_mapping_find(&udp_source); 323 if (*udp_mapping) { 324 pf_addrcpy(naddr, 325 &(*udp_mapping)->endpoints[1].addr, 326 pd->af); 327 *nport = (*udp_mapping)->endpoints[1].port; 328 /* 329 * Try to find a src_node as per pf_map_addr(). 330 * XXX: Why? This code seems to do nothing. 331 */ 332 if (rpool->opts & PF_POOL_STICKYADDR && 333 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 334 sn = pf_find_src_node(&pd->nsaddr, r, 335 pd->af, &sh, sn_type, false); 336 if (sn != NULL) 337 PF_SRC_NODE_UNLOCK(sn); 338 return (0); 339 } else { 340 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, 341 pd->nsport, &init_addr, 0); 342 if (*udp_mapping == NULL) 343 return (1); 344 } 345 } 346 } 347 348 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL, 349 &init_addr, rpool, sn_type)) 350 goto failed; 351 352 if (pd->proto == IPPROTO_ICMP) { 353 if (pd->ndport == htons(ICMP_ECHO)) { 354 low = 1; 355 high = 65535; 356 } else { 357 MPASS(udp_mapping == NULL || 358 *udp_mapping == NULL); 359 return (0); /* Don't try to modify non-echo ICMP */ 360 } 361 } 362 #ifdef INET6 363 if (pd->proto == IPPROTO_ICMPV6) { 364 if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) { 365 low = 1; 366 high = 65535; 367 } else { 368 MPASS(udp_mapping == NULL || 369 *udp_mapping == NULL); 370 return (0); /* Don't try to modify non-echo ICMP */ 371 } 372 } 373 #endif /* INET6 */ 374 375 bzero(&key, sizeof(key)); 376 key.af = pd->naf; 377 key.proto = pd->proto; 378 379 do { 380 pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); 381 pf_addrcpy(&key.addr[sidx], naddr, key.af); 382 key.port[didx] = pd->ndport; 383 384 if (udp_mapping && *udp_mapping) 385 pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, 386 pd->af); 387 388 /* 389 * port search; start random, step; 390 * similar 2 portloop in in_pcbbind 391 */ 392 if (pd->proto == IPPROTO_SCTP) { 393 key.port[sidx] = pd->nsport; 394 MPASS(udp_mapping == NULL || 395 *udp_mapping == NULL); 396 if (!pf_find_state_all_exists(&key, dir)) { 397 *nport = pd->nsport; 398 return (0); 399 } else { 400 return (1); /* Fail mapping. */ 401 } 402 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 403 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { 404 /* 405 * XXX bug: icmp states don't use the id on both sides. 406 * (traceroute -I through nat) 407 */ 408 key.port[sidx] = pd->nsport; 409 if (!pf_find_state_all_exists(&key, dir)) { 410 MPASS(udp_mapping == NULL || 411 *udp_mapping == NULL); 412 *nport = pd->nsport; 413 return (0); 414 } 415 } else if (low == high) { 416 key.port[sidx] = htons(low); 417 if (!pf_find_state_all_exists(&key, dir)) { 418 if (udp_mapping && *udp_mapping != NULL) { 419 (*udp_mapping)->endpoints[1].port = htons(low); 420 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 421 *nport = htons(low); 422 return (0); 423 } 424 } else { 425 MPASS(udp_mapping == NULL || 426 *udp_mapping == NULL); 427 *nport = htons(low); 428 return (0); 429 } 430 } 431 } else { 432 uint32_t tmp; 433 uint16_t cut; 434 435 if (low > high) { 436 tmp = low; 437 low = high; 438 high = tmp; 439 } 440 /* low < high */ 441 cut = arc4random() % (1 + high - low) + low; 442 /* low <= cut <= high */ 443 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 444 if (udp_mapping && *udp_mapping != NULL) { 445 (*udp_mapping)->endpoints[sidx].port = htons(tmp); 446 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 447 *nport = htons(tmp); 448 return (0); 449 } 450 } else { 451 key.port[sidx] = htons(tmp); 452 if (!pf_find_state_all_exists(&key, dir)) { 453 *nport = htons(tmp); 454 MPASS(udp_mapping == NULL || 455 *udp_mapping == NULL); 456 return (0); 457 } 458 } 459 } 460 tmp = cut; 461 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 462 if (pd->proto == IPPROTO_UDP && 463 (rpool->opts & PF_POOL_ENDPI && 464 udp_mapping != NULL)) { 465 (*udp_mapping)->endpoints[1].port = htons(tmp); 466 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 467 *nport = htons(tmp); 468 return (0); 469 } 470 } else { 471 key.port[sidx] = htons(tmp); 472 if (!pf_find_state_all_exists(&key, dir)) { 473 MPASS(udp_mapping == NULL || 474 *udp_mapping == NULL); 475 *nport = htons(tmp); 476 return (0); 477 } 478 } 479 } 480 } 481 482 switch (rpool->opts & PF_POOL_TYPEMASK) { 483 case PF_POOL_RANDOM: 484 case PF_POOL_ROUNDROBIN: 485 /* 486 * pick a different source address since we're out 487 * of free port choices for the current one. 488 */ 489 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, 490 &(pd->naf), NULL, &init_addr, rpool, sn_type)) 491 goto failed; 492 break; 493 case PF_POOL_NONE: 494 case PF_POOL_SRCHASH: 495 case PF_POOL_BITMASK: 496 default: 497 goto failed; 498 } 499 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 500 501 failed: 502 if (udp_mapping) { 503 uma_zfree(V_pf_udp_mapping_z, *udp_mapping); 504 *udp_mapping = NULL; 505 } 506 507 return (1); /* none available */ 508 } 509 510 static bool 511 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) 512 { 513 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) 514 return (true); 515 return (false); 516 } 517 518 static int 519 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, 520 struct pf_addr *naddr, uint16_t *nport, struct pf_udp_mapping **udp_mapping, 521 struct pf_kpool *rpool) 522 { 523 uint16_t psmask, low, highmask; 524 uint16_t i, ahigh, cut; 525 int ashift, psidshift; 526 527 ashift = 16 - rpool->mape.offset; 528 psidshift = ashift - rpool->mape.psidlen; 529 psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1); 530 psmask = psmask << psidshift; 531 highmask = (1U << psidshift) - 1; 532 533 ahigh = (1U << rpool->mape.offset) - 1; 534 cut = arc4random() & ahigh; 535 if (cut == 0) 536 cut = 1; 537 538 for (i = cut; i <= ahigh; i++) { 539 low = (i << ashift) | psmask; 540 if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, 541 rpool, udp_mapping, PF_SN_NAT)) 542 return (0); 543 } 544 for (i = cut - 1; i > 0; i--) { 545 low = (i << ashift) | psmask; 546 if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, 547 rpool, udp_mapping, PF_SN_NAT)) 548 return (0); 549 } 550 return (1); 551 } 552 553 static __inline u_short 554 pf_check_src_node_valid(struct pf_ksrc_node *sn, struct pf_kpool *rpool) 555 { 556 struct pf_addr *raddr, *rmask; 557 struct pf_addr *caddr; /* cached redirection address */ 558 struct pf_kpooladdr *pa; 559 sa_family_t raf; 560 sa_family_t caf; /* cached redirection AF */ 561 u_short valid = 0; 562 563 KASSERT(sn != NULL, ("sn is NULL")); 564 KASSERT(rpool != NULL, ("rpool is NULL")); 565 566 /* check if the cached entry is still valid */ 567 568 if (sn->type == PF_SN_LIMIT) { 569 /* Always valid as it does not store redirection address */ 570 return (1); 571 } 572 573 mtx_lock(&rpool->mtx); 574 caddr = &(sn->raddr); 575 caf = sn->raf; 576 577 TAILQ_FOREACH(pa, &rpool->list, entries) { 578 if (PF_AZERO(caddr, caf)) { 579 valid = 1; 580 goto done; 581 } else if (pa->addr.type == PF_ADDR_DYNIFTL) { 582 if (pfr_kentry_byaddr(pa->addr.p.dyn->pfid_kt, caddr, caf, 0)) { 583 valid = 1; 584 goto done; 585 } 586 } else if (pa->addr.type == PF_ADDR_TABLE) { 587 if (pfr_kentry_byaddr(pa->addr.p.tbl, caddr, caf, 0)) { 588 valid = 1; 589 goto done; 590 } 591 } else if (pa->addr.type != PF_ADDR_NOROUTE) { 592 /* PF_ADDR_URPFFAILED, PF_ADDR_RANGE, PF_ADDR_ADDRMASK */ 593 raddr = &(pa->addr.v.a.addr); 594 rmask = &(pa->addr.v.a.mask); 595 raf = pa->af; 596 if (raf == caf && pf_match_addr(0, raddr, rmask, caddr, caf)) { 597 valid = 1; 598 goto done; 599 } 600 } 601 /* else PF_ADDR_NOROUTE */ 602 } 603 604 done: 605 mtx_unlock(&rpool->mtx); 606 607 return (valid); 608 } 609 610 u_short 611 pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, 612 struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf, 613 struct pf_addr *init_addr, struct pf_kpool *rpool) 614 { 615 u_short reason = PFRES_MATCH; 616 struct pf_addr *raddr = NULL, *rmask = NULL; 617 struct pfr_ktable *kt; 618 uint64_t hashidx; 619 int cnt; 620 sa_family_t wanted_af; 621 u_int8_t pool_type; 622 bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; 623 624 KASSERT(saf != 0, ("%s: saf == 0", __func__)); 625 KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); 626 KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); 627 628 /* 629 * Given (*naf) is a hint about AF of the forwarded packet. 630 * It might be changed if prefer_ipv6_nexthop is enabled and 631 * the combination of nexthop AF and packet AF allows for it. 632 */ 633 wanted_af = (*naf); 634 635 mtx_lock(&rpool->mtx); 636 /* Find the route using chosen algorithm. Store the found route 637 in src_node if it was given or found. */ 638 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { 639 reason = PFRES_MAPFAILED; 640 goto done_pool_mtx; 641 } 642 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 643 switch (wanted_af) { 644 #ifdef INET 645 case AF_INET: 646 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 647 !PF_POOL_DYNTYPE(rpool->opts)) { 648 reason = PFRES_MAPFAILED; 649 goto done_pool_mtx; 650 } 651 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 652 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 653 break; 654 #endif /* INET */ 655 #ifdef INET6 656 case AF_INET6: 657 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 658 !PF_POOL_DYNTYPE(rpool->opts)) { 659 reason = PFRES_MAPFAILED; 660 goto done_pool_mtx; 661 } 662 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 663 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 664 break; 665 #endif /* INET6 */ 666 default: 667 unhandled_af(wanted_af); 668 } 669 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 670 if (!PF_POOL_DYNTYPE(rpool->opts)) { 671 reason = PFRES_MAPFAILED; 672 goto done_pool_mtx; /* unsupported */ 673 } 674 } else { 675 raddr = &rpool->cur->addr.v.a.addr; 676 rmask = &rpool->cur->addr.v.a.mask; 677 } 678 679 /* 680 * For pools with a single host with the prefer-ipv6-nexthop option 681 * we can return pool address of any AF, unless the forwarded packet 682 * is IPv6, then we can return only if pool address is IPv6. 683 * For non-prefer-ipv6-nexthop we can return pool address only 684 * of wanted AF, unless the pool address'es AF is unknown, which 685 * happens in case old ioctls have been used to set up the pool. 686 * 687 * Round-robin pools have their own logic for retrying next addresses. 688 */ 689 pool_type = rpool->opts & PF_POOL_TYPEMASK; 690 if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || 691 ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && 692 rpool->cur->addr.type != PF_ADDR_TABLE && 693 rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { 694 if (prefer_ipv6_nexthop) { 695 if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { 696 reason = PFRES_MAPFAILED; 697 goto done_pool_mtx; 698 } 699 wanted_af = rpool->cur->af; 700 } else { 701 if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { 702 reason = PFRES_MAPFAILED; 703 goto done_pool_mtx; 704 } 705 } 706 } 707 708 switch (pool_type) { 709 case PF_POOL_NONE: 710 pf_addrcpy(naddr, raddr, wanted_af); 711 break; 712 case PF_POOL_BITMASK: 713 pf_poolmask(naddr, raddr, rmask, saddr, wanted_af); 714 break; 715 case PF_POOL_RANDOM: 716 if (rpool->cur->addr.type == PF_ADDR_TABLE || 717 rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 718 if (rpool->cur->addr.type == PF_ADDR_TABLE) 719 kt = rpool->cur->addr.p.tbl; 720 else 721 kt = rpool->cur->addr.p.dyn->pfid_kt; 722 kt = pfr_ktable_select_active(kt); 723 if (kt == NULL) { 724 reason = PFRES_MAPFAILED; 725 goto done_pool_mtx; /* unsupported */ 726 } 727 cnt = kt->pfrkt_cnt; 728 if (cnt == 0) 729 rpool->tblidx = 0; 730 else 731 rpool->tblidx = (int)arc4random_uniform(cnt); 732 memset(&rpool->counter, 0, sizeof(rpool->counter)); 733 if (prefer_ipv6_nexthop) 734 wanted_af = AF_INET6; 735 retry_other_af_random: 736 if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, 737 wanted_af, pf_islinklocal, false)) { 738 /* Retry with IPv4 nexthop for IPv4 traffic */ 739 if (prefer_ipv6_nexthop && 740 wanted_af == AF_INET6 && 741 (*naf) == AF_INET) { 742 wanted_af = AF_INET; 743 goto retry_other_af_random; 744 } else { 745 /* no hosts in wanted AF */ 746 reason = PFRES_MAPFAILED; 747 goto done_pool_mtx; 748 } 749 } 750 pf_addrcpy(naddr, &rpool->counter, wanted_af); 751 } else if (init_addr != NULL && PF_AZERO(init_addr, 752 wanted_af)) { 753 switch (wanted_af) { 754 #ifdef INET 755 case AF_INET: 756 rpool->counter.addr32[0] = arc4random(); 757 break; 758 #endif /* INET */ 759 #ifdef INET6 760 case AF_INET6: 761 if (rmask->addr32[3] != 0xffffffff) 762 rpool->counter.addr32[3] = 763 arc4random(); 764 else 765 break; 766 if (rmask->addr32[2] != 0xffffffff) 767 rpool->counter.addr32[2] = 768 arc4random(); 769 else 770 break; 771 if (rmask->addr32[1] != 0xffffffff) 772 rpool->counter.addr32[1] = 773 arc4random(); 774 else 775 break; 776 if (rmask->addr32[0] != 0xffffffff) 777 rpool->counter.addr32[0] = 778 arc4random(); 779 break; 780 #endif /* INET6 */ 781 } 782 pf_poolmask(naddr, raddr, rmask, &rpool->counter, 783 wanted_af); 784 pf_addrcpy(init_addr, naddr, wanted_af); 785 786 } else { 787 pf_addr_inc(&rpool->counter, wanted_af); 788 pf_poolmask(naddr, raddr, rmask, &rpool->counter, 789 wanted_af); 790 } 791 break; 792 case PF_POOL_SRCHASH: 793 { 794 unsigned char hash[16]; 795 796 hashidx = 797 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, 798 wanted_af); 799 if (rpool->cur->addr.type == PF_ADDR_TABLE || 800 rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 801 if (rpool->cur->addr.type == PF_ADDR_TABLE) 802 kt = rpool->cur->addr.p.tbl; 803 else 804 kt = rpool->cur->addr.p.dyn->pfid_kt; 805 kt = pfr_ktable_select_active(kt); 806 if (kt == NULL) { 807 reason = PFRES_MAPFAILED; 808 goto done_pool_mtx; /* unsupported */ 809 } 810 cnt = kt->pfrkt_cnt; 811 if (cnt == 0) 812 rpool->tblidx = 0; 813 else 814 rpool->tblidx = (int)(hashidx % cnt); 815 memset(&rpool->counter, 0, sizeof(rpool->counter)); 816 if (prefer_ipv6_nexthop) 817 wanted_af = AF_INET6; 818 retry_other_af_srchash: 819 if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, 820 wanted_af, pf_islinklocal, false)) { 821 /* Retry with IPv4 nexthop for IPv4 traffic */ 822 if (prefer_ipv6_nexthop && 823 wanted_af == AF_INET6 && 824 (*naf) == AF_INET) { 825 wanted_af = AF_INET; 826 goto retry_other_af_srchash; 827 } else { 828 /* no hosts in wanted AF */ 829 reason = PFRES_MAPFAILED; 830 goto done_pool_mtx; 831 } 832 } 833 pf_addrcpy(naddr, &rpool->counter, wanted_af); 834 } else { 835 pf_poolmask(naddr, raddr, rmask, 836 (struct pf_addr *)&hash, wanted_af); 837 } 838 break; 839 } 840 case PF_POOL_ROUNDROBIN: 841 { 842 struct pf_kpooladdr *acur = rpool->cur; 843 844 retry_other_af_rr: 845 if (prefer_ipv6_nexthop) 846 wanted_af = rpool->ipv6_nexthop_af; 847 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 848 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 849 &rpool->tblidx, &rpool->counter, wanted_af, 850 NULL, true)) 851 goto get_addr; 852 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 853 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 854 &rpool->tblidx, &rpool->counter, wanted_af, 855 pf_islinklocal, true)) 856 goto get_addr; 857 } else if (rpool->cur->af == wanted_af && 858 pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) 859 goto get_addr; 860 if (prefer_ipv6_nexthop && 861 (*naf) == AF_INET && wanted_af == AF_INET6) { 862 /* Reset table index when changing wanted AF. */ 863 rpool->tblidx = -1; 864 rpool->ipv6_nexthop_af = AF_INET; 865 goto retry_other_af_rr; 866 } 867 try_next: 868 /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ 869 rpool->ipv6_nexthop_af = AF_INET6; 870 if (TAILQ_NEXT(rpool->cur, entries) == NULL) 871 rpool->cur = TAILQ_FIRST(&rpool->list); 872 else 873 rpool->cur = TAILQ_NEXT(rpool->cur, entries); 874 try_next_ipv6_nexthop_rr: 875 /* Reset table index when iterating pools or changing wanted AF. */ 876 rpool->tblidx = -1; 877 if (prefer_ipv6_nexthop) 878 wanted_af = rpool->ipv6_nexthop_af; 879 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 880 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 881 &rpool->tblidx, &rpool->counter, wanted_af, NULL, 882 true)) 883 goto get_addr; 884 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 885 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 886 &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, 887 true)) 888 goto get_addr; 889 } else { 890 if (rpool->cur->af == wanted_af) { 891 raddr = &rpool->cur->addr.v.a.addr; 892 rmask = &rpool->cur->addr.v.a.mask; 893 pf_addrcpy(&rpool->counter, raddr, wanted_af); 894 goto get_addr; 895 } 896 } 897 if (prefer_ipv6_nexthop && 898 (*naf) == AF_INET && wanted_af == AF_INET6) { 899 rpool->ipv6_nexthop_af = AF_INET; 900 goto try_next_ipv6_nexthop_rr; 901 } 902 if (rpool->cur != acur) 903 goto try_next; 904 reason = PFRES_MAPFAILED; 905 goto done_pool_mtx; 906 get_addr: 907 pf_addrcpy(naddr, &rpool->counter, wanted_af); 908 if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) 909 pf_addrcpy(init_addr, naddr, wanted_af); 910 pf_addr_inc(&rpool->counter, wanted_af); 911 break; 912 } 913 } 914 915 if (wanted_af == 0) { 916 reason = PFRES_MAPFAILED; 917 goto done_pool_mtx; 918 } 919 920 if (nkif) 921 *nkif = rpool->cur->kif; 922 923 (*naf) = wanted_af; 924 925 done_pool_mtx: 926 mtx_unlock(&rpool->mtx); 927 928 return (reason); 929 } 930 931 u_short 932 pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, 933 struct pf_addr *naddr, sa_family_t *naf, struct pfi_kkif **nkif, 934 struct pf_addr *init_addr, struct pf_kpool *rpool, pf_sn_types_t sn_type) 935 { 936 struct pf_ksrc_node *sn = NULL; 937 struct pf_srchash *sh = NULL; 938 u_short reason = 0; 939 940 /* 941 * If this is a sticky-address rule, try to find an existing src_node. 942 */ 943 if (rpool->opts & PF_POOL_STICKYADDR && 944 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 945 sn = pf_find_src_node(saddr, r, saf, &sh, sn_type, false); 946 947 if (sn != NULL) { 948 PF_SRC_NODE_LOCK_ASSERT(sn); 949 /* 950 * Check if source node's redirection address still exists 951 * in pool from which the SN was created. If not, delete it. 952 * Similar to pf_kill_srcnodes(). Unlink the source node 953 * from tree, unlink it from states, then free it. Do not 954 * overlap source node and state locks to avoid LOR. 955 */ 956 if (!pf_check_src_node_valid(sn, rpool)) { 957 pf_unlink_src_node(sn); 958 PF_SRC_NODE_UNLOCK(sn); 959 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 960 printf("%s: stale src tracking (%d) ", 961 __func__, sn_type); 962 pf_print_host(saddr, 0, saf); 963 printf(" to "); 964 pf_print_host(&(sn->raddr), 0, sn->raf); 965 if (nkif) 966 printf("@%s", sn->rkif->pfik_name); 967 printf("\n"); 968 } 969 970 for (int i = 0; i <= V_pf_hashmask; i++) { 971 struct pf_idhash *ih = &V_pf_idhash[i]; 972 struct pf_kstate *st; 973 974 PF_HASHROW_LOCK(ih); 975 LIST_FOREACH(st, &ih->states, entry) { 976 if (st->sns[sn->type] == sn) { 977 st->sns[sn->type] = NULL; 978 } 979 } 980 PF_HASHROW_UNLOCK(ih); 981 } 982 pf_free_src_node(sn); 983 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); 984 sn = NULL; 985 goto map_addr; 986 } 987 988 (*naf) = sn->raf; 989 990 /* If the supplied address is the same as the current one we've 991 * been asked before, so tell the caller that there's no other 992 * address to be had. */ 993 994 if (PF_AEQ(naddr, &(sn->raddr), *naf)) { 995 printf("%s: no more addresses\n", __func__); 996 reason = PFRES_MAPFAILED; 997 goto done; 998 } 999 1000 pf_addrcpy(naddr, &(sn->raddr), *naf); 1001 1002 if (nkif) 1003 *nkif = sn->rkif; 1004 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 1005 printf("%s: src tracking maps ", __func__); 1006 pf_print_host(saddr, 0, saf); 1007 printf(" to "); 1008 pf_print_host(naddr, 0, *naf); 1009 if (nkif) 1010 printf("@%s", (*nkif)->pfik_name); 1011 printf("\n"); 1012 } 1013 goto done; 1014 } 1015 1016 map_addr: 1017 /* 1018 * Source node has not been found or is invalid. Find a new address 1019 * and store it in variables given by the caller. 1020 */ 1021 if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr, 1022 rpool)) != 0) { 1023 if (V_pf_status.debug >= PF_DEBUG_MISC) 1024 printf("%s: pf_map_addr has failed\n", __func__); 1025 goto done; 1026 } 1027 1028 if (V_pf_status.debug >= PF_DEBUG_NOISY && 1029 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 1030 printf("%s: selected address ", __func__); 1031 pf_print_host(naddr, 0, *naf); 1032 if (nkif) 1033 printf("@%s", (*nkif)->pfik_name); 1034 printf("\n"); 1035 } 1036 1037 done: 1038 if (sn != NULL) 1039 PF_SRC_NODE_UNLOCK(sn); 1040 1041 return (reason); 1042 } 1043 1044 u_short 1045 pf_get_translation(struct pf_test_ctx *ctx) 1046 { 1047 struct pf_krule *r = NULL; 1048 u_short transerror; 1049 1050 PF_RULES_RASSERT(); 1051 KASSERT(ctx->sk == NULL, ("*skp not NULL")); 1052 KASSERT(ctx->nk == NULL, ("*nkp not NULL")); 1053 1054 ctx->nr = NULL; 1055 1056 if (ctx->pd->dir == PF_OUT) { 1057 r = pf_match_translation(PF_RULESET_BINAT, ctx); 1058 if (r == NULL) 1059 r = pf_match_translation(PF_RULESET_NAT, ctx); 1060 } else { 1061 r = pf_match_translation(PF_RULESET_RDR, ctx); 1062 if (r == NULL) 1063 r = pf_match_translation(PF_RULESET_BINAT, ctx); 1064 } 1065 1066 if (r == NULL) 1067 return (PFRES_MAX); 1068 1069 switch (r->action) { 1070 case PF_NONAT: 1071 case PF_NOBINAT: 1072 case PF_NORDR: 1073 return (PFRES_MAX); 1074 } 1075 1076 transerror = pf_get_transaddr(ctx, r, r->action, &(r->rdr)); 1077 if (transerror == PFRES_MATCH) 1078 ctx->nr = r; 1079 1080 return (transerror); 1081 } 1082 1083 u_short 1084 pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, 1085 uint8_t nat_action, struct pf_kpool *rpool) 1086 { 1087 struct pf_pdesc *pd = ctx->pd; 1088 struct pf_addr *naddr; 1089 int idx; 1090 uint16_t *nportp; 1091 uint16_t low, high; 1092 u_short reason; 1093 1094 PF_RULES_RASSERT(); 1095 KASSERT(r != NULL, ("r is NULL")); 1096 KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule")); 1097 1098 if (ctx->sk == NULL && ctx->nk == NULL) { 1099 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &ctx->sk, 1100 &ctx->nk)) 1101 return (PFRES_MEMORY); 1102 } 1103 1104 switch (nat_action) { 1105 case PF_NAT: 1106 idx = pd->sidx; 1107 break; 1108 case PF_BINAT: 1109 idx = 1; 1110 break; 1111 case PF_RDR: 1112 idx = pd->didx; 1113 break; 1114 } 1115 naddr = &ctx->nk->addr[idx]; 1116 nportp = &ctx->nk->port[idx]; 1117 1118 switch (nat_action) { 1119 case PF_NAT: 1120 if (pd->proto == IPPROTO_ICMP) { 1121 low = 1; 1122 high = 65535; 1123 } else { 1124 low = rpool->proxy_port[0]; 1125 high = rpool->proxy_port[1]; 1126 } 1127 if (rpool->mape.offset > 0) { 1128 if (pf_get_mape_sport(pd, r, naddr, nportp, 1129 &ctx->udp_mapping, rpool)) { 1130 DPFPRINTF(PF_DEBUG_MISC, 1131 "pf: MAP-E port allocation (%u/%u/%u)" 1132 " failed", 1133 rpool->mape.offset, 1134 rpool->mape.psidlen, 1135 rpool->mape.psid); 1136 reason = PFRES_MAPFAILED; 1137 goto notrans; 1138 } 1139 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, 1140 rpool, &ctx->udp_mapping, PF_SN_NAT)) { 1141 DPFPRINTF(PF_DEBUG_MISC, 1142 "pf: NAT proxy port allocation (%u-%u) failed", 1143 rpool->proxy_port[0], rpool->proxy_port[1]); 1144 reason = PFRES_MAPFAILED; 1145 goto notrans; 1146 } 1147 break; 1148 case PF_BINAT: 1149 switch (pd->dir) { 1150 case PF_OUT: 1151 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){ 1152 switch (pd->af) { 1153 #ifdef INET 1154 case AF_INET: 1155 if (rpool->cur->addr.p.dyn-> 1156 pfid_acnt4 < 1) { 1157 reason = PFRES_MAPFAILED; 1158 goto notrans; 1159 } 1160 pf_poolmask(naddr, 1161 &rpool->cur->addr.p.dyn->pfid_addr4, 1162 &rpool->cur->addr.p.dyn->pfid_mask4, 1163 &pd->nsaddr, AF_INET); 1164 break; 1165 #endif /* INET */ 1166 #ifdef INET6 1167 case AF_INET6: 1168 if (rpool->cur->addr.p.dyn-> 1169 pfid_acnt6 < 1) { 1170 reason = PFRES_MAPFAILED; 1171 goto notrans; 1172 } 1173 pf_poolmask(naddr, 1174 &rpool->cur->addr.p.dyn->pfid_addr6, 1175 &rpool->cur->addr.p.dyn->pfid_mask6, 1176 &pd->nsaddr, AF_INET6); 1177 break; 1178 #endif /* INET6 */ 1179 } 1180 } else 1181 pf_poolmask(naddr, 1182 &rpool->cur->addr.v.a.addr, 1183 &rpool->cur->addr.v.a.mask, &pd->nsaddr, 1184 pd->af); 1185 break; 1186 case PF_IN: 1187 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 1188 switch (pd->af) { 1189 #ifdef INET 1190 case AF_INET: 1191 if (r->src.addr.p.dyn->pfid_acnt4 < 1) { 1192 reason = PFRES_MAPFAILED; 1193 goto notrans; 1194 } 1195 pf_poolmask(naddr, 1196 &r->src.addr.p.dyn->pfid_addr4, 1197 &r->src.addr.p.dyn->pfid_mask4, 1198 &pd->ndaddr, AF_INET); 1199 break; 1200 #endif /* INET */ 1201 #ifdef INET6 1202 case AF_INET6: 1203 if (r->src.addr.p.dyn->pfid_acnt6 < 1) { 1204 reason = PFRES_MAPFAILED; 1205 goto notrans; 1206 } 1207 pf_poolmask(naddr, 1208 &r->src.addr.p.dyn->pfid_addr6, 1209 &r->src.addr.p.dyn->pfid_mask6, 1210 &pd->ndaddr, AF_INET6); 1211 break; 1212 #endif /* INET6 */ 1213 } 1214 } else 1215 pf_poolmask(naddr, &r->src.addr.v.a.addr, 1216 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); 1217 break; 1218 } 1219 break; 1220 case PF_RDR: { 1221 struct pf_state_key_cmp key; 1222 int tries; 1223 uint16_t cut, low, high, nport; 1224 1225 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, 1226 &(pd->naf), NULL, NULL, rpool, PF_SN_NAT); 1227 1228 if (reason != 0) 1229 goto notrans; 1230 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 1231 pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, 1232 &pd->ndaddr, pd->af); 1233 1234 /* Do not change SCTP ports. */ 1235 if (pd->proto == IPPROTO_SCTP) 1236 break; 1237 1238 if (rpool->proxy_port[1]) { 1239 uint32_t tmp_nport; 1240 uint16_t div; 1241 1242 div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; 1243 div = (div == 0) ? 1 : div; 1244 1245 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + 1246 rpool->proxy_port[0]; 1247 1248 /* Wrap around if necessary. */ 1249 if (tmp_nport > 65535) 1250 tmp_nport -= 65535; 1251 nport = htons((uint16_t)tmp_nport); 1252 } else if (rpool->proxy_port[0]) 1253 nport = htons(rpool->proxy_port[0]); 1254 else 1255 nport = pd->ndport; 1256 1257 /* 1258 * Update the destination port. 1259 */ 1260 *nportp = nport; 1261 1262 /* 1263 * Do we have a source port conflict in the stack state? Try to 1264 * modulate the source port if so. Note that this is racy since 1265 * the state lookup may not find any matches here but will once 1266 * pf_create_state() actually instantiates the state. 1267 */ 1268 bzero(&key, sizeof(key)); 1269 key.af = pd->af; 1270 key.proto = pd->proto; 1271 key.port[0] = pd->nsport; 1272 pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); 1273 key.port[1] = nport; 1274 pf_addrcpy(&key.addr[1], naddr, key.af); 1275 1276 if (!pf_find_state_all_exists(&key, PF_OUT)) 1277 break; 1278 1279 tries = 0; 1280 1281 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ 1282 high = 65535; 1283 cut = arc4random() % (1 + high - low) + low; 1284 for (uint32_t tmp = cut; 1285 tmp <= high && tmp <= UINT16_MAX && 1286 tries < V_pf_rdr_srcport_rewrite_tries; 1287 tmp++, tries++) { 1288 key.port[0] = htons(tmp); 1289 if (!pf_find_state_all_exists(&key, PF_OUT)) { 1290 /* Update the source port. */ 1291 ctx->nk->port[0] = htons(tmp); 1292 goto out; 1293 } 1294 } 1295 for (uint32_t tmp = cut - 1; 1296 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries; 1297 tmp--, tries++) { 1298 key.port[0] = htons(tmp); 1299 if (!pf_find_state_all_exists(&key, PF_OUT)) { 1300 /* Update the source port. */ 1301 ctx->nk->port[0] = htons(tmp); 1302 goto out; 1303 } 1304 } 1305 1306 /* 1307 * We failed to find a match. Push on ahead anyway, let 1308 * pf_state_insert() be the arbiter of whether the state 1309 * conflict is tolerable. In particular, with TCP connections 1310 * the state may be reused if the TCP state is terminal. 1311 */ 1312 DPFPRINTF(PF_DEBUG_MISC, 1313 "pf: RDR source port allocation failed"); 1314 break; 1315 1316 out: 1317 DPFPRINTF(PF_DEBUG_MISC, 1318 "pf: RDR source port allocation %u->%u", 1319 ntohs(pd->nsport), ntohs(ctx->nk->port[0])); 1320 break; 1321 } 1322 default: 1323 panic("%s: unknown action %u", __func__, r->action); 1324 } 1325 1326 /* Return success only if translation really happened. */ 1327 if (bcmp(ctx->sk, ctx->nk, sizeof(struct pf_state_key_cmp))) { 1328 return (PFRES_MATCH); 1329 } 1330 1331 reason = PFRES_MAX; 1332 notrans: 1333 uma_zfree(V_pf_state_key_z, ctx->nk); 1334 uma_zfree(V_pf_state_key_z, ctx->sk); 1335 ctx->sk = ctx->nk = NULL; 1336 1337 return (reason); 1338 } 1339 1340 int 1341 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) 1342 { 1343 #if defined(INET) && defined(INET6) 1344 struct pf_addr ndaddr, nsaddr, naddr; 1345 u_int16_t nport = 0; 1346 int prefixlen = 96; 1347 1348 bzero(&nsaddr, sizeof(nsaddr)); 1349 bzero(&ndaddr, sizeof(ndaddr)); 1350 1351 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1352 printf("pf: af-to %s %s, ", 1353 pd->naf == AF_INET ? "inet" : "inet6", 1354 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr"); 1355 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 1356 printf(" -> "); 1357 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 1358 printf("\n"); 1359 } 1360 1361 if (TAILQ_EMPTY(&r->nat.list)) 1362 panic("pf_get_transaddr_af: no nat pool for source address"); 1363 1364 /* get source address and port */ 1365 if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0], 1366 r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) { 1367 DPFPRINTF(PF_DEBUG_MISC, 1368 "pf: af-to NAT proxy port allocation (%u-%u) failed", 1369 r->nat.proxy_port[0], r->nat.proxy_port[1]); 1370 return (-1); 1371 } 1372 1373 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 1374 pd->ndport = ntohs(pd->ndport); 1375 if (pd->ndport == ICMP6_ECHO_REQUEST) 1376 pd->ndport = ICMP_ECHO; 1377 else if (pd->ndport == ICMP6_ECHO_REPLY) 1378 pd->ndport = ICMP_ECHOREPLY; 1379 pd->ndport = htons(pd->ndport); 1380 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 1381 pd->nsport = ntohs(pd->nsport); 1382 if (pd->ndport == ICMP_ECHO) 1383 pd->ndport = ICMP6_ECHO_REQUEST; 1384 else if (pd->ndport == ICMP_ECHOREPLY) 1385 pd->ndport = ICMP6_ECHO_REPLY; 1386 pd->nsport = htons(pd->nsport); 1387 } 1388 1389 /* get the destination address and port */ 1390 if (! TAILQ_EMPTY(&r->rdr.list)) { 1391 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, &(pd->naf), 1392 NULL, NULL, &r->rdr, PF_SN_NAT)) 1393 return (-1); 1394 if (r->rdr.proxy_port[0]) 1395 pd->ndport = htons(r->rdr.proxy_port[0]); 1396 1397 if (pd->naf == AF_INET) { 1398 /* The prefix is the IPv4 rdr address */ 1399 prefixlen = in_mask2len( 1400 (struct in_addr *)&r->rdr.cur->addr.v.a.mask); 1401 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1402 prefixlen); 1403 } else { 1404 /* The prefix is the IPv6 rdr address */ 1405 prefixlen = in6_mask2len( 1406 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL); 1407 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1408 prefixlen); 1409 } 1410 } else { 1411 if (pd->naf == AF_INET) { 1412 /* The prefix is the IPv6 dst address */ 1413 prefixlen = in6_mask2len( 1414 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL); 1415 if (prefixlen < 32) 1416 prefixlen = 96; 1417 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr, 1418 prefixlen); 1419 } else { 1420 /* 1421 * The prefix is the IPv6 nat address 1422 * (that was stored in pd->nsaddr) 1423 */ 1424 prefixlen = in6_mask2len( 1425 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL); 1426 if (prefixlen > 96) 1427 prefixlen = 96; 1428 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr, 1429 prefixlen); 1430 } 1431 } 1432 1433 pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); 1434 pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); 1435 1436 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1437 printf("pf: af-to %s done, prefixlen %d, ", 1438 pd->naf == AF_INET ? "inet" : "inet6", 1439 prefixlen); 1440 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 1441 printf(" -> "); 1442 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 1443 printf("\n"); 1444 } 1445 1446 return (0); 1447 #else 1448 return (-1); 1449 #endif 1450 } 1451