1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_pf.h" 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 44 #include <sys/param.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <crypto/siphash/siphash.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 #include <net/pfvar.h> 56 #include <net/if_pflog.h> 57 58 #ifdef INET 59 #include <netinet/in_var.h> 60 #endif /* INET */ 61 62 #ifdef INET6 63 #include <netinet6/in6_var.h> 64 #endif /* INET6 */ 65 66 67 /* 68 * Limit the amount of work we do to find a free source port for redirects that 69 * introduce a state conflict. 70 */ 71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) 72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; 73 74 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, 75 struct pf_poolhashkey *, sa_family_t); 76 struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); 77 static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *, 78 struct pf_krule *); 79 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, 80 struct pf_addr *, uint16_t *, uint16_t, uint16_t, 81 struct pf_kpool *, struct pf_udp_mapping **, 82 pf_sn_types_t); 83 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); 84 85 static uint64_t 86 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 87 struct pf_poolhashkey *key, sa_family_t af) 88 { 89 SIPHASH_CTX ctx; 90 #ifdef INET6 91 union { 92 uint64_t hash64; 93 uint32_t hash32[2]; 94 } h; 95 #endif /* INET6 */ 96 uint64_t res = 0; 97 98 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); 99 100 switch (af) { 101 #ifdef INET 102 case AF_INET: 103 res = SipHash24(&ctx, (const uint8_t *)key, 104 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 105 hash->addr32[0] = res; 106 break; 107 #endif /* INET */ 108 #ifdef INET6 109 case AF_INET6: 110 res = SipHash24(&ctx, (const uint8_t *)key, 111 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); 112 h.hash64 = res; 113 hash->addr32[0] = h.hash32[0]; 114 hash->addr32[1] = h.hash32[1]; 115 /* 116 * siphash isn't big enough, but flipping it around is 117 * good enough here. 118 */ 119 hash->addr32[2] = ~h.hash32[1]; 120 hash->addr32[3] = ~h.hash32[0]; 121 break; 122 #endif /* INET6 */ 123 default: 124 unhandled_af(af); 125 } 126 return (res); 127 } 128 129 #define PF_TEST_ATTRIB(t, a) \ 130 if (t) { \ 131 r = a; \ 132 continue; \ 133 } else do { \ 134 } while (0) 135 136 static enum pf_test_status 137 pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) 138 { 139 struct pf_krule *r; 140 struct pf_pdesc *pd = ctx->pd; 141 int rtableid = -1; 142 143 r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); 144 while (r != NULL) { 145 struct pf_rule_addr *src = NULL, *dst = NULL; 146 struct pf_addr_wrap *xdst = NULL; 147 148 if (r->action == PF_BINAT && pd->dir == PF_IN) { 149 src = &r->dst; 150 if (r->rdr.cur != NULL) 151 xdst = &r->rdr.cur->addr; 152 } else { 153 src = &r->src; 154 dst = &r->dst; 155 } 156 157 pf_counter_u64_add(&r->evaluations, 1); 158 PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, 159 r->skip[PF_SKIP_IFP]); 160 PF_TEST_ATTRIB(r->direction && r->direction != pd->dir, 161 r->skip[PF_SKIP_DIR]); 162 PF_TEST_ATTRIB(r->af && r->af != pd->af, 163 r->skip[PF_SKIP_AF]); 164 PF_TEST_ATTRIB(r->proto && r->proto != pd->proto, 165 r->skip[PF_SKIP_PROTO]); 166 PF_TEST_ATTRIB(PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af, 167 src->neg, pd->kif, M_GETFIB(pd->m)), 168 r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 169 PF_SKIP_DST_ADDR]); 170 PF_TEST_ATTRIB(src->port_op && !pf_match_port(src->port_op, 171 src->port[0], src->port[1], pd->nsport), 172 r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 173 PF_SKIP_DST_PORT]); 174 PF_TEST_ATTRIB(dst != NULL && 175 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL, 176 M_GETFIB(pd->m)), 177 r->skip[PF_SKIP_DST_ADDR]); 178 PF_TEST_ATTRIB(xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af, 179 0, NULL, M_GETFIB(pd->m)), 180 TAILQ_NEXT(r, entries)); 181 PF_TEST_ATTRIB(dst != NULL && dst->port_op && 182 !pf_match_port(dst->port_op, dst->port[0], 183 dst->port[1], pd->ndport), 184 r->skip[PF_SKIP_DST_PORT]); 185 PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag, 186 pd->pf_mtag ? pd->pf_mtag->tag : 0), 187 TAILQ_NEXT(r, entries)); 188 PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 189 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, 190 &pd->hdr.tcp), r->os_fingerprint)), 191 TAILQ_NEXT(r, entries)); 192 if (r->tag) 193 ctx->tag = r->tag; 194 if (r->rtableid >= 0) 195 rtableid = r->rtableid; 196 if (r->anchor == NULL) { 197 if (r->action == PF_NONAT || 198 r->action == PF_NORDR || 199 r->action == PF_NOBINAT) { 200 *ctx->rm = NULL; 201 } else { 202 /* 203 * found matching r 204 */ 205 ctx->tr = r; 206 /* 207 * anchor, with ruleset, where r belongs to 208 */ 209 *ctx->am = ctx->a; 210 /* 211 * ruleset where r belongs to 212 */ 213 *ctx->rsm = ruleset; 214 /* 215 * ruleset, where anchor belongs to. 216 */ 217 ctx->arsm = ctx->aruleset; 218 } 219 } else { 220 ctx->a = r; /* remember anchor */ 221 ctx->aruleset = ruleset; /* and its ruleset */ 222 if (pf_step_into_translation_anchor(rs_num, ctx, 223 r) != PF_TEST_OK) { 224 break; 225 } 226 } 227 r = TAILQ_NEXT(r, entries); 228 } 229 230 if (ctx->tag > 0 && pf_tag_packet(pd, ctx->tag)) 231 return (PF_TEST_FAIL); 232 if (rtableid >= 0) 233 M_SETFIB(pd->m, rtableid); 234 235 return (PF_TEST_OK); 236 } 237 238 static enum pf_test_status 239 pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_krule *r) 240 { 241 enum pf_test_status rv; 242 243 PF_RULES_RASSERT(); 244 245 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 246 printf("%s: anchor stack overflow on %s\n", 247 __func__, r->anchor->name); 248 return (PF_TEST_FAIL); 249 } 250 251 ctx->depth++; 252 253 if (r->anchor_wildcard) { 254 struct pf_kanchor *child; 255 rv = PF_TEST_OK; 256 RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { 257 rv = pf_match_translation_rule(rs_num, ctx, &child->ruleset); 258 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 259 /* 260 * we either hit a rule qith quick action 261 * (more likely), or hit some runtime 262 * error (e.g. pool_get() faillure). 263 */ 264 break; 265 } 266 } 267 } else { 268 rv = pf_match_translation_rule(rs_num, ctx, &r->anchor->ruleset); 269 } 270 271 ctx->depth--; 272 273 return (rv); 274 } 275 276 struct pf_krule * 277 pf_match_translation(int rs_num, struct pf_test_ctx *ctx) 278 { 279 enum pf_test_status rv; 280 281 MPASS(ctx->depth == 0); 282 rv = pf_match_translation_rule(rs_num, ctx, &pf_main_ruleset); 283 MPASS(ctx->depth == 0); 284 if (rv != PF_TEST_OK) 285 return (NULL); 286 287 return (ctx->tr); 288 } 289 290 static int 291 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, 292 uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool, 293 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) 294 { 295 struct pf_state_key_cmp key; 296 struct pf_addr init_addr; 297 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; 298 int sidx = pd->sidx; 299 int didx = pd->didx; 300 301 bzero(&init_addr, sizeof(init_addr)); 302 303 if (udp_mapping) { 304 MPASS(*udp_mapping == NULL); 305 } 306 307 /* 308 * If we are UDP and have an existing mapping we can get source port 309 * from the mapping. In this case we have to look up the src_node as 310 * pf_map_addr would. 311 */ 312 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) { 313 struct pf_udp_endpoint_cmp udp_source; 314 315 bzero(&udp_source, sizeof(udp_source)); 316 udp_source.af = pd->af; 317 pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); 318 udp_source.port = pd->nsport; 319 if (udp_mapping) { 320 struct pf_ksrc_node *sn = NULL; 321 struct pf_srchash *sh = NULL; 322 *udp_mapping = pf_udp_mapping_find(&udp_source); 323 if (*udp_mapping) { 324 pf_addrcpy(naddr, 325 &(*udp_mapping)->endpoints[1].addr, 326 pd->af); 327 *nport = (*udp_mapping)->endpoints[1].port; 328 /* 329 * Try to find a src_node as per pf_map_addr(). 330 * XXX: Why? This code seems to do nothing. 331 */ 332 if (rpool->opts & PF_POOL_STICKYADDR && 333 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 334 sn = pf_find_src_node(&pd->nsaddr, r, 335 pd->af, &sh, sn_type, false); 336 if (sn != NULL) 337 PF_SRC_NODE_UNLOCK(sn); 338 return (0); 339 } else { 340 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, 341 pd->nsport, &init_addr, 0); 342 if (*udp_mapping == NULL) 343 return (1); 344 } 345 } 346 } 347 348 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr, 349 rpool, sn_type)) 350 goto failed; 351 352 if (pd->proto == IPPROTO_ICMP) { 353 if (pd->ndport == htons(ICMP_ECHO)) { 354 low = 1; 355 high = 65535; 356 } else 357 return (0); /* Don't try to modify non-echo ICMP */ 358 } 359 #ifdef INET6 360 if (pd->proto == IPPROTO_ICMPV6) { 361 if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) { 362 low = 1; 363 high = 65535; 364 } else 365 return (0); /* Don't try to modify non-echo ICMP */ 366 } 367 #endif /* INET6 */ 368 369 bzero(&key, sizeof(key)); 370 key.af = pd->naf; 371 key.proto = pd->proto; 372 373 do { 374 pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); 375 pf_addrcpy(&key.addr[sidx], naddr, key.af); 376 key.port[didx] = pd->ndport; 377 378 if (udp_mapping && *udp_mapping) 379 pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, 380 pd->af); 381 382 /* 383 * port search; start random, step; 384 * similar 2 portloop in in_pcbbind 385 */ 386 if (pd->proto == IPPROTO_SCTP) { 387 key.port[sidx] = pd->nsport; 388 if (!pf_find_state_all_exists(&key, dir)) { 389 *nport = pd->nsport; 390 return (0); 391 } else { 392 return (1); /* Fail mapping. */ 393 } 394 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 395 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { 396 /* 397 * XXX bug: icmp states don't use the id on both sides. 398 * (traceroute -I through nat) 399 */ 400 key.port[sidx] = pd->nsport; 401 if (!pf_find_state_all_exists(&key, dir)) { 402 *nport = pd->nsport; 403 return (0); 404 } 405 } else if (low == high) { 406 key.port[sidx] = htons(low); 407 if (!pf_find_state_all_exists(&key, dir)) { 408 if (udp_mapping && *udp_mapping != NULL) { 409 (*udp_mapping)->endpoints[1].port = htons(low); 410 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 411 *nport = htons(low); 412 return (0); 413 } 414 } else { 415 *nport = htons(low); 416 return (0); 417 } 418 } 419 } else { 420 uint32_t tmp; 421 uint16_t cut; 422 423 if (low > high) { 424 tmp = low; 425 low = high; 426 high = tmp; 427 } 428 /* low < high */ 429 cut = arc4random() % (1 + high - low) + low; 430 /* low <= cut <= high */ 431 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 432 if (udp_mapping && *udp_mapping != NULL) { 433 (*udp_mapping)->endpoints[sidx].port = htons(tmp); 434 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 435 *nport = htons(tmp); 436 return (0); 437 } 438 } else { 439 key.port[sidx] = htons(tmp); 440 if (!pf_find_state_all_exists(&key, dir)) { 441 *nport = htons(tmp); 442 return (0); 443 } 444 } 445 } 446 tmp = cut; 447 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 448 if (pd->proto == IPPROTO_UDP && 449 (rpool->opts & PF_POOL_ENDPI && 450 udp_mapping != NULL)) { 451 (*udp_mapping)->endpoints[1].port = htons(tmp); 452 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 453 *nport = htons(tmp); 454 return (0); 455 } 456 } else { 457 key.port[sidx] = htons(tmp); 458 if (!pf_find_state_all_exists(&key, dir)) { 459 *nport = htons(tmp); 460 return (0); 461 } 462 } 463 } 464 } 465 466 switch (rpool->opts & PF_POOL_TYPEMASK) { 467 case PF_POOL_RANDOM: 468 case PF_POOL_ROUNDROBIN: 469 /* 470 * pick a different source address since we're out 471 * of free port choices for the current one. 472 */ 473 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, 474 &init_addr, rpool, sn_type)) 475 return (1); 476 break; 477 case PF_POOL_NONE: 478 case PF_POOL_SRCHASH: 479 case PF_POOL_BITMASK: 480 default: 481 return (1); 482 } 483 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 484 485 failed: 486 if (udp_mapping) { 487 uma_zfree(V_pf_udp_mapping_z, *udp_mapping); 488 *udp_mapping = NULL; 489 } 490 491 return (1); /* none available */ 492 } 493 494 static bool 495 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) 496 { 497 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) 498 return (true); 499 return (false); 500 } 501 502 static int 503 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, 504 struct pf_addr *naddr, uint16_t *nport, 505 struct pf_udp_mapping **udp_mapping, struct pf_kpool *rpool) 506 { 507 uint16_t psmask, low, highmask; 508 uint16_t i, ahigh, cut; 509 int ashift, psidshift; 510 511 ashift = 16 - rpool->mape.offset; 512 psidshift = ashift - rpool->mape.psidlen; 513 psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1); 514 psmask = psmask << psidshift; 515 highmask = (1U << psidshift) - 1; 516 517 ahigh = (1U << rpool->mape.offset) - 1; 518 cut = arc4random() & ahigh; 519 if (cut == 0) 520 cut = 1; 521 522 for (i = cut; i <= ahigh; i++) { 523 low = (i << ashift) | psmask; 524 if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, 525 rpool, udp_mapping, PF_SN_NAT)) 526 return (0); 527 } 528 for (i = cut - 1; i > 0; i--) { 529 low = (i << ashift) | psmask; 530 if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, 531 rpool, udp_mapping, PF_SN_NAT)) 532 return (0); 533 } 534 return (1); 535 } 536 537 u_short 538 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, 539 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, 540 struct pf_kpool *rpool) 541 { 542 u_short reason = PFRES_MATCH; 543 struct pf_addr *raddr = NULL, *rmask = NULL; 544 struct pfr_ktable *kt; 545 uint64_t hashidx; 546 int cnt; 547 548 mtx_lock(&rpool->mtx); 549 /* Find the route using chosen algorithm. Store the found route 550 in src_node if it was given or found. */ 551 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { 552 reason = PFRES_MAPFAILED; 553 goto done_pool_mtx; 554 } 555 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 556 switch (af) { 557 #ifdef INET 558 case AF_INET: 559 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 560 !PF_POOL_DYNTYPE(rpool->opts)) { 561 reason = PFRES_MAPFAILED; 562 goto done_pool_mtx; 563 } 564 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 565 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 566 break; 567 #endif /* INET */ 568 #ifdef INET6 569 case AF_INET6: 570 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 571 !PF_POOL_DYNTYPE(rpool->opts)) { 572 reason = PFRES_MAPFAILED; 573 goto done_pool_mtx; 574 } 575 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 576 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 577 break; 578 #endif /* INET6 */ 579 default: 580 unhandled_af(af); 581 } 582 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 583 if (!PF_POOL_DYNTYPE(rpool->opts)) { 584 reason = PFRES_MAPFAILED; 585 goto done_pool_mtx; /* unsupported */ 586 } 587 } else { 588 raddr = &rpool->cur->addr.v.a.addr; 589 rmask = &rpool->cur->addr.v.a.mask; 590 } 591 592 switch (rpool->opts & PF_POOL_TYPEMASK) { 593 case PF_POOL_NONE: 594 pf_addrcpy(naddr, raddr, af); 595 break; 596 case PF_POOL_BITMASK: 597 pf_poolmask(naddr, raddr, rmask, saddr, af); 598 break; 599 case PF_POOL_RANDOM: 600 if (rpool->cur->addr.type == PF_ADDR_TABLE || 601 rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 602 if (rpool->cur->addr.type == PF_ADDR_TABLE) 603 kt = rpool->cur->addr.p.tbl; 604 else 605 kt = rpool->cur->addr.p.dyn->pfid_kt; 606 kt = pfr_ktable_select_active(kt); 607 if (kt == NULL) { 608 reason = PFRES_MAPFAILED; 609 goto done_pool_mtx; /* unsupported */ 610 } 611 cnt = kt->pfrkt_cnt; 612 if (cnt == 0) 613 rpool->tblidx = 0; 614 else 615 rpool->tblidx = (int)arc4random_uniform(cnt); 616 memset(&rpool->counter, 0, sizeof(rpool->counter)); 617 if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, 618 af, pf_islinklocal, false)) { 619 reason = PFRES_MAPFAILED; 620 goto done_pool_mtx; /* unsupported */ 621 } 622 pf_addrcpy(naddr, &rpool->counter, af); 623 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { 624 switch (af) { 625 #ifdef INET 626 case AF_INET: 627 rpool->counter.addr32[0] = arc4random(); 628 break; 629 #endif /* INET */ 630 #ifdef INET6 631 case AF_INET6: 632 if (rmask->addr32[3] != 0xffffffff) 633 rpool->counter.addr32[3] = 634 arc4random(); 635 else 636 break; 637 if (rmask->addr32[2] != 0xffffffff) 638 rpool->counter.addr32[2] = 639 arc4random(); 640 else 641 break; 642 if (rmask->addr32[1] != 0xffffffff) 643 rpool->counter.addr32[1] = 644 arc4random(); 645 else 646 break; 647 if (rmask->addr32[0] != 0xffffffff) 648 rpool->counter.addr32[0] = 649 arc4random(); 650 break; 651 #endif /* INET6 */ 652 } 653 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 654 pf_addrcpy(init_addr, naddr, af); 655 656 } else { 657 pf_addr_inc(&rpool->counter, af); 658 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 659 } 660 break; 661 case PF_POOL_SRCHASH: 662 { 663 unsigned char hash[16]; 664 665 hashidx = 666 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 667 if (rpool->cur->addr.type == PF_ADDR_TABLE || 668 rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 669 if (rpool->cur->addr.type == PF_ADDR_TABLE) 670 kt = rpool->cur->addr.p.tbl; 671 else 672 kt = rpool->cur->addr.p.dyn->pfid_kt; 673 kt = pfr_ktable_select_active(kt); 674 if (kt == NULL) { 675 reason = PFRES_MAPFAILED; 676 goto done_pool_mtx; /* unsupported */ 677 } 678 cnt = kt->pfrkt_cnt; 679 if (cnt == 0) 680 rpool->tblidx = 0; 681 else 682 rpool->tblidx = (int)(hashidx % cnt); 683 memset(&rpool->counter, 0, sizeof(rpool->counter)); 684 if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, 685 af, pf_islinklocal, false)) { 686 reason = PFRES_MAPFAILED; 687 goto done_pool_mtx; /* unsupported */ 688 } 689 pf_addrcpy(naddr, &rpool->counter, af); 690 } else { 691 pf_poolmask(naddr, raddr, rmask, 692 (struct pf_addr *)&hash, af); 693 } 694 break; 695 } 696 case PF_POOL_ROUNDROBIN: 697 { 698 struct pf_kpooladdr *acur = rpool->cur; 699 700 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 701 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 702 &rpool->tblidx, &rpool->counter, af, NULL, true)) 703 goto get_addr; 704 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 705 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 706 &rpool->tblidx, &rpool->counter, af, pf_islinklocal, 707 true)) 708 goto get_addr; 709 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 710 goto get_addr; 711 712 try_next: 713 if (TAILQ_NEXT(rpool->cur, entries) == NULL) 714 rpool->cur = TAILQ_FIRST(&rpool->list); 715 else 716 rpool->cur = TAILQ_NEXT(rpool->cur, entries); 717 rpool->tblidx = -1; 718 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 719 if (pfr_pool_get(rpool->cur->addr.p.tbl, 720 &rpool->tblidx, &rpool->counter, af, NULL, true)) { 721 /* table contains no address of type 'af' */ 722 if (rpool->cur != acur) 723 goto try_next; 724 reason = PFRES_MAPFAILED; 725 goto done_pool_mtx; 726 } 727 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 728 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 729 &rpool->tblidx, &rpool->counter, af, pf_islinklocal, 730 true)) { 731 /* table contains no address of type 'af' */ 732 if (rpool->cur != acur) 733 goto try_next; 734 reason = PFRES_MAPFAILED; 735 goto done_pool_mtx; 736 } 737 } else { 738 raddr = &rpool->cur->addr.v.a.addr; 739 rmask = &rpool->cur->addr.v.a.mask; 740 pf_addrcpy(&rpool->counter, raddr, af); 741 } 742 743 get_addr: 744 pf_addrcpy(naddr, &rpool->counter, af); 745 if (init_addr != NULL && PF_AZERO(init_addr, af)) 746 pf_addrcpy(init_addr, naddr, af); 747 pf_addr_inc(&rpool->counter, af); 748 break; 749 } 750 } 751 752 if (nkif) 753 *nkif = rpool->cur->kif; 754 755 done_pool_mtx: 756 mtx_unlock(&rpool->mtx); 757 758 return (reason); 759 } 760 761 u_short 762 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, 763 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, 764 struct pf_kpool *rpool, pf_sn_types_t sn_type) 765 { 766 struct pf_ksrc_node *sn = NULL; 767 struct pf_srchash *sh = NULL; 768 u_short reason = 0; 769 770 /* 771 * If this is a sticky-address rule, try to find an existing src_node. 772 */ 773 if (rpool->opts & PF_POOL_STICKYADDR && 774 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 775 sn = pf_find_src_node(saddr, r, af, &sh, sn_type, false); 776 777 if (sn != NULL) { 778 PF_SRC_NODE_LOCK_ASSERT(sn); 779 780 /* If the supplied address is the same as the current one we've 781 * been asked before, so tell the caller that there's no other 782 * address to be had. */ 783 if (PF_AEQ(naddr, &(sn->raddr), af)) { 784 reason = PFRES_MAPFAILED; 785 goto done; 786 } 787 788 pf_addrcpy(naddr, &(sn->raddr), af); 789 if (nkif) 790 *nkif = sn->rkif; 791 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 792 printf("%s: src tracking maps ", __func__); 793 pf_print_host(saddr, 0, af); 794 printf(" to "); 795 pf_print_host(naddr, 0, af); 796 if (nkif) 797 printf("@%s", (*nkif)->pfik_name); 798 printf("\n"); 799 } 800 goto done; 801 } 802 803 /* 804 * Source node has not been found. Find a new address and store it 805 * in variables given by the caller. 806 */ 807 if ((reason = pf_map_addr(af, r, saddr, naddr, nkif, init_addr, 808 rpool)) != 0) { 809 if (V_pf_status.debug >= PF_DEBUG_MISC) 810 printf("%s: pf_map_addr has failed\n", __func__); 811 goto done; 812 } 813 814 if (V_pf_status.debug >= PF_DEBUG_NOISY && 815 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 816 printf("%s: selected address ", __func__); 817 pf_print_host(naddr, 0, af); 818 if (nkif) 819 printf("@%s", (*nkif)->pfik_name); 820 printf("\n"); 821 } 822 823 done: 824 if (sn != NULL) 825 PF_SRC_NODE_UNLOCK(sn); 826 827 return (reason); 828 } 829 830 u_short 831 pf_get_translation(struct pf_test_ctx *ctx) 832 { 833 struct pf_krule *r = NULL; 834 u_short transerror; 835 836 PF_RULES_RASSERT(); 837 KASSERT(ctx->sk == NULL, ("*skp not NULL")); 838 KASSERT(ctx->nk == NULL, ("*nkp not NULL")); 839 840 ctx->nr = NULL; 841 842 if (ctx->pd->dir == PF_OUT) { 843 r = pf_match_translation(PF_RULESET_BINAT, ctx); 844 if (r == NULL) 845 r = pf_match_translation(PF_RULESET_NAT, ctx); 846 } else { 847 r = pf_match_translation(PF_RULESET_RDR, ctx); 848 if (r == NULL) 849 r = pf_match_translation(PF_RULESET_BINAT, ctx); 850 } 851 852 if (r == NULL) 853 return (PFRES_MAX); 854 855 switch (r->action) { 856 case PF_NONAT: 857 case PF_NOBINAT: 858 case PF_NORDR: 859 return (PFRES_MAX); 860 } 861 862 transerror = pf_get_transaddr(ctx, r, r->action, &(r->rdr)); 863 if (transerror == PFRES_MATCH) 864 ctx->nr = r; 865 866 return (transerror); 867 } 868 869 u_short 870 pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, 871 uint8_t nat_action, struct pf_kpool *rpool) 872 { 873 struct pf_pdesc *pd = ctx->pd; 874 struct pf_addr *naddr; 875 uint16_t *nportp; 876 uint16_t low, high; 877 u_short reason; 878 879 PF_RULES_RASSERT(); 880 KASSERT(r != NULL, ("r is NULL")); 881 KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule")); 882 883 if (ctx->sk == NULL && ctx->nk == NULL) { 884 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &ctx->sk, 885 &ctx->nk)) 886 return (PFRES_MEMORY); 887 } 888 889 naddr = &ctx->nk->addr[1]; 890 nportp = &ctx->nk->port[1]; 891 892 switch (nat_action) { 893 case PF_NAT: 894 if (pd->proto == IPPROTO_ICMP) { 895 low = 1; 896 high = 65535; 897 } else { 898 low = rpool->proxy_port[0]; 899 high = rpool->proxy_port[1]; 900 } 901 if (rpool->mape.offset > 0) { 902 if (pf_get_mape_sport(pd, r, naddr, nportp, 903 &ctx->udp_mapping, rpool)) { 904 DPFPRINTF(PF_DEBUG_MISC, 905 "pf: MAP-E port allocation (%u/%u/%u)" 906 " failed", 907 rpool->mape.offset, 908 rpool->mape.psidlen, 909 rpool->mape.psid); 910 reason = PFRES_MAPFAILED; 911 goto notrans; 912 } 913 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, 914 rpool, &ctx->udp_mapping, PF_SN_NAT)) { 915 DPFPRINTF(PF_DEBUG_MISC, 916 "pf: NAT proxy port allocation (%u-%u) failed", 917 rpool->proxy_port[0], rpool->proxy_port[1]); 918 reason = PFRES_MAPFAILED; 919 goto notrans; 920 } 921 break; 922 case PF_BINAT: 923 switch (pd->dir) { 924 case PF_OUT: 925 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){ 926 switch (pd->af) { 927 #ifdef INET 928 case AF_INET: 929 if (rpool->cur->addr.p.dyn-> 930 pfid_acnt4 < 1) { 931 reason = PFRES_MAPFAILED; 932 goto notrans; 933 } 934 pf_poolmask(naddr, 935 &rpool->cur->addr.p.dyn->pfid_addr4, 936 &rpool->cur->addr.p.dyn->pfid_mask4, 937 &pd->nsaddr, AF_INET); 938 break; 939 #endif /* INET */ 940 #ifdef INET6 941 case AF_INET6: 942 if (rpool->cur->addr.p.dyn-> 943 pfid_acnt6 < 1) { 944 reason = PFRES_MAPFAILED; 945 goto notrans; 946 } 947 pf_poolmask(naddr, 948 &rpool->cur->addr.p.dyn->pfid_addr6, 949 &rpool->cur->addr.p.dyn->pfid_mask6, 950 &pd->nsaddr, AF_INET6); 951 break; 952 #endif /* INET6 */ 953 } 954 } else 955 pf_poolmask(naddr, 956 &rpool->cur->addr.v.a.addr, 957 &rpool->cur->addr.v.a.mask, &pd->nsaddr, 958 pd->af); 959 break; 960 case PF_IN: 961 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 962 switch (pd->af) { 963 #ifdef INET 964 case AF_INET: 965 if (r->src.addr.p.dyn->pfid_acnt4 < 1) { 966 reason = PFRES_MAPFAILED; 967 goto notrans; 968 } 969 pf_poolmask(naddr, 970 &r->src.addr.p.dyn->pfid_addr4, 971 &r->src.addr.p.dyn->pfid_mask4, 972 &pd->ndaddr, AF_INET); 973 break; 974 #endif /* INET */ 975 #ifdef INET6 976 case AF_INET6: 977 if (r->src.addr.p.dyn->pfid_acnt6 < 1) { 978 reason = PFRES_MAPFAILED; 979 goto notrans; 980 } 981 pf_poolmask(naddr, 982 &r->src.addr.p.dyn->pfid_addr6, 983 &r->src.addr.p.dyn->pfid_mask6, 984 &pd->ndaddr, AF_INET6); 985 break; 986 #endif /* INET6 */ 987 } 988 } else 989 pf_poolmask(naddr, &r->src.addr.v.a.addr, 990 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); 991 break; 992 } 993 break; 994 case PF_RDR: { 995 struct pf_state_key_cmp key; 996 int tries; 997 uint16_t cut, low, high, nport; 998 999 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL, 1000 NULL, rpool, PF_SN_NAT); 1001 if (reason != 0) 1002 goto notrans; 1003 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 1004 pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, 1005 &pd->ndaddr, pd->af); 1006 1007 /* Do not change SCTP ports. */ 1008 if (pd->proto == IPPROTO_SCTP) 1009 break; 1010 1011 if (rpool->proxy_port[1]) { 1012 uint32_t tmp_nport; 1013 uint16_t div; 1014 1015 div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; 1016 div = (div == 0) ? 1 : div; 1017 1018 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + 1019 rpool->proxy_port[0]; 1020 1021 /* Wrap around if necessary. */ 1022 if (tmp_nport > 65535) 1023 tmp_nport -= 65535; 1024 nport = htons((uint16_t)tmp_nport); 1025 } else if (rpool->proxy_port[0]) 1026 nport = htons(rpool->proxy_port[0]); 1027 else 1028 nport = pd->ndport; 1029 1030 /* 1031 * Update the destination port. 1032 */ 1033 *nportp = nport; 1034 1035 /* 1036 * Do we have a source port conflict in the stack state? Try to 1037 * modulate the source port if so. Note that this is racy since 1038 * the state lookup may not find any matches here but will once 1039 * pf_create_state() actually instantiates the state. 1040 */ 1041 bzero(&key, sizeof(key)); 1042 key.af = pd->af; 1043 key.proto = pd->proto; 1044 key.port[0] = pd->nsport; 1045 pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); 1046 key.port[1] = nport; 1047 pf_addrcpy(&key.addr[1], naddr, key.af); 1048 1049 if (!pf_find_state_all_exists(&key, PF_OUT)) 1050 break; 1051 1052 tries = 0; 1053 1054 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ 1055 high = 65535; 1056 cut = arc4random() % (1 + high - low) + low; 1057 for (uint32_t tmp = cut; 1058 tmp <= high && tmp <= UINT16_MAX && 1059 tries < V_pf_rdr_srcport_rewrite_tries; 1060 tmp++, tries++) { 1061 key.port[0] = htons(tmp); 1062 if (!pf_find_state_all_exists(&key, PF_OUT)) { 1063 /* Update the source port. */ 1064 ctx->nk->port[0] = htons(tmp); 1065 goto out; 1066 } 1067 } 1068 for (uint32_t tmp = cut - 1; 1069 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries; 1070 tmp--, tries++) { 1071 key.port[0] = htons(tmp); 1072 if (!pf_find_state_all_exists(&key, PF_OUT)) { 1073 /* Update the source port. */ 1074 ctx->nk->port[0] = htons(tmp); 1075 goto out; 1076 } 1077 } 1078 1079 /* 1080 * We failed to find a match. Push on ahead anyway, let 1081 * pf_state_insert() be the arbiter of whether the state 1082 * conflict is tolerable. In particular, with TCP connections 1083 * the state may be reused if the TCP state is terminal. 1084 */ 1085 DPFPRINTF(PF_DEBUG_MISC, 1086 "pf: RDR source port allocation failed"); 1087 break; 1088 1089 out: 1090 DPFPRINTF(PF_DEBUG_MISC, 1091 "pf: RDR source port allocation %u->%u", 1092 ntohs(pd->nsport), ntohs(ctx->nk->port[0])); 1093 break; 1094 } 1095 default: 1096 panic("%s: unknown action %u", __func__, r->action); 1097 } 1098 1099 /* Return success only if translation really happened. */ 1100 if (bcmp(ctx->sk, ctx->nk, sizeof(struct pf_state_key_cmp))) { 1101 return (PFRES_MATCH); 1102 } 1103 1104 reason = PFRES_MAX; 1105 notrans: 1106 uma_zfree(V_pf_state_key_z, ctx->nk); 1107 uma_zfree(V_pf_state_key_z, ctx->sk); 1108 ctx->sk = ctx->nk = NULL; 1109 1110 return (reason); 1111 } 1112 1113 int 1114 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) 1115 { 1116 #if defined(INET) && defined(INET6) 1117 struct pf_addr ndaddr, nsaddr, naddr; 1118 u_int16_t nport = 0; 1119 int prefixlen = 96; 1120 1121 bzero(&nsaddr, sizeof(nsaddr)); 1122 bzero(&ndaddr, sizeof(ndaddr)); 1123 1124 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1125 printf("pf: af-to %s %s, ", 1126 pd->naf == AF_INET ? "inet" : "inet6", 1127 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr"); 1128 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 1129 printf(" -> "); 1130 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 1131 printf("\n"); 1132 } 1133 1134 if (TAILQ_EMPTY(&r->nat.list)) 1135 panic("pf_get_transaddr_af: no nat pool for source address"); 1136 1137 /* get source address and port */ 1138 if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0], 1139 r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) { 1140 DPFPRINTF(PF_DEBUG_MISC, 1141 "pf: af-to NAT proxy port allocation (%u-%u) failed", 1142 r->nat.proxy_port[0], r->nat.proxy_port[1]); 1143 return (-1); 1144 } 1145 1146 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 1147 pd->ndport = ntohs(pd->ndport); 1148 if (pd->ndport == ICMP6_ECHO_REQUEST) 1149 pd->ndport = ICMP_ECHO; 1150 else if (pd->ndport == ICMP6_ECHO_REPLY) 1151 pd->ndport = ICMP_ECHOREPLY; 1152 pd->ndport = htons(pd->ndport); 1153 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 1154 pd->nsport = ntohs(pd->nsport); 1155 if (pd->ndport == ICMP_ECHO) 1156 pd->ndport = ICMP6_ECHO_REQUEST; 1157 else if (pd->ndport == ICMP_ECHOREPLY) 1158 pd->ndport = ICMP6_ECHO_REPLY; 1159 pd->nsport = htons(pd->nsport); 1160 } 1161 1162 /* get the destination address and port */ 1163 if (! TAILQ_EMPTY(&r->rdr.list)) { 1164 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL, 1165 &r->rdr, PF_SN_NAT)) 1166 return (-1); 1167 if (r->rdr.proxy_port[0]) 1168 pd->ndport = htons(r->rdr.proxy_port[0]); 1169 1170 if (pd->naf == AF_INET) { 1171 /* The prefix is the IPv4 rdr address */ 1172 prefixlen = in_mask2len( 1173 (struct in_addr *)&r->rdr.cur->addr.v.a.mask); 1174 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1175 prefixlen); 1176 } else { 1177 /* The prefix is the IPv6 rdr address */ 1178 prefixlen = in6_mask2len( 1179 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL); 1180 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1181 prefixlen); 1182 } 1183 } else { 1184 if (pd->naf == AF_INET) { 1185 /* The prefix is the IPv6 dst address */ 1186 prefixlen = in6_mask2len( 1187 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL); 1188 if (prefixlen < 32) 1189 prefixlen = 96; 1190 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr, 1191 prefixlen); 1192 } else { 1193 /* 1194 * The prefix is the IPv6 nat address 1195 * (that was stored in pd->nsaddr) 1196 */ 1197 prefixlen = in6_mask2len( 1198 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL); 1199 if (prefixlen > 96) 1200 prefixlen = 96; 1201 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr, 1202 prefixlen); 1203 } 1204 } 1205 1206 pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); 1207 pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); 1208 1209 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1210 printf("pf: af-to %s done, prefixlen %d, ", 1211 pd->naf == AF_INET ? "inet" : "inet6", 1212 prefixlen); 1213 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 1214 printf(" -> "); 1215 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 1216 printf("\n"); 1217 } 1218 1219 return (0); 1220 #else 1221 return (-1); 1222 #endif 1223 } 1224