1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_pf.h" 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 44 #include <sys/param.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <crypto/siphash/siphash.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 #include <net/pfvar.h> 56 #include <net/if_pflog.h> 57 58 #ifdef INET 59 #include <netinet/in_var.h> 60 #endif /* INET */ 61 62 #ifdef INET6 63 #include <netinet6/in6_var.h> 64 #endif /* INET6 */ 65 66 67 /* 68 * Limit the amount of work we do to find a free source port for redirects that 69 * introduce a state conflict. 70 */ 71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) 72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; 73 74 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x 75 76 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, 77 struct pf_poolhashkey *, sa_family_t); 78 static struct pf_krule *pf_match_translation(struct pf_pdesc *, 79 int, struct pf_kanchor_stackframe *); 80 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, 81 struct pf_addr *, uint16_t *, uint16_t, uint16_t, 82 struct pf_ksrc_node **, struct pf_srchash **, 83 struct pf_kpool *, struct pf_udp_mapping **, 84 pf_sn_types_t); 85 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); 86 87 static uint64_t 88 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 89 struct pf_poolhashkey *key, sa_family_t af) 90 { 91 SIPHASH_CTX ctx; 92 #ifdef INET6 93 union { 94 uint64_t hash64; 95 uint32_t hash32[2]; 96 } h; 97 #endif /* INET6 */ 98 uint64_t res = 0; 99 100 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); 101 102 switch (af) { 103 #ifdef INET 104 case AF_INET: 105 res = SipHash24(&ctx, (const uint8_t *)key, 106 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 107 hash->addr32[0] = res; 108 break; 109 #endif /* INET */ 110 #ifdef INET6 111 case AF_INET6: 112 res = SipHash24(&ctx, (const uint8_t *)key, 113 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); 114 h.hash64 = res; 115 hash->addr32[0] = h.hash32[0]; 116 hash->addr32[1] = h.hash32[1]; 117 /* 118 * siphash isn't big enough, but flipping it around is 119 * good enough here. 120 */ 121 hash->addr32[2] = ~h.hash32[1]; 122 hash->addr32[3] = ~h.hash32[0]; 123 break; 124 #endif /* INET6 */ 125 default: 126 unhandled_af(af); 127 } 128 return (res); 129 } 130 131 static struct pf_krule * 132 pf_match_translation(struct pf_pdesc *pd, 133 int rs_num, struct pf_kanchor_stackframe *anchor_stack) 134 { 135 struct pf_krule *r, *rm = NULL; 136 struct pf_kruleset *ruleset = NULL; 137 int tag = -1; 138 int rtableid = -1; 139 int asd = 0; 140 141 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 142 while (r != NULL) { 143 struct pf_rule_addr *src = NULL, *dst = NULL; 144 struct pf_addr_wrap *xdst = NULL; 145 146 if (r->action == PF_BINAT && pd->dir == PF_IN) { 147 src = &r->dst; 148 if (r->rdr.cur != NULL) 149 xdst = &r->rdr.cur->addr; 150 } else { 151 src = &r->src; 152 dst = &r->dst; 153 } 154 155 pf_counter_u64_add(&r->evaluations, 1); 156 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot) 157 r = r->skip[PF_SKIP_IFP]; 158 else if (r->direction && r->direction != pd->dir) 159 r = r->skip[PF_SKIP_DIR]; 160 else if (r->af && r->af != pd->af) 161 r = r->skip[PF_SKIP_AF]; 162 else if (r->proto && r->proto != pd->proto) 163 r = r->skip[PF_SKIP_PROTO]; 164 else if (PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af, 165 src->neg, pd->kif, M_GETFIB(pd->m))) 166 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 167 PF_SKIP_DST_ADDR]; 168 else if (src->port_op && !pf_match_port(src->port_op, 169 src->port[0], src->port[1], pd->nsport)) 170 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 171 PF_SKIP_DST_PORT]; 172 else if (dst != NULL && 173 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL, 174 M_GETFIB(pd->m))) 175 r = r->skip[PF_SKIP_DST_ADDR]; 176 else if (xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af, 177 0, NULL, M_GETFIB(pd->m))) 178 r = TAILQ_NEXT(r, entries); 179 else if (dst != NULL && dst->port_op && 180 !pf_match_port(dst->port_op, dst->port[0], 181 dst->port[1], pd->ndport)) 182 r = r->skip[PF_SKIP_DST_PORT]; 183 else if (r->match_tag && !pf_match_tag(pd->m, r, &tag, 184 pd->pf_mtag ? pd->pf_mtag->tag : 0)) 185 r = TAILQ_NEXT(r, entries); 186 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 187 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, 188 &pd->hdr.tcp), r->os_fingerprint))) 189 r = TAILQ_NEXT(r, entries); 190 else { 191 if (r->tag) 192 tag = r->tag; 193 if (r->rtableid >= 0) 194 rtableid = r->rtableid; 195 if (r->anchor == NULL) { 196 rm = r; 197 if (rm->action == PF_NONAT || 198 rm->action == PF_NORDR || 199 rm->action == PF_NOBINAT) { 200 rm = NULL; 201 } 202 break; 203 } else 204 pf_step_into_anchor(anchor_stack, &asd, 205 &ruleset, rs_num, &r, NULL); 206 } 207 if (r == NULL) 208 pf_step_out_of_anchor(anchor_stack, &asd, &ruleset, 209 rs_num, &r, NULL, NULL); 210 } 211 212 if (tag > 0 && pf_tag_packet(pd, tag)) 213 return (NULL); 214 if (rtableid >= 0) 215 M_SETFIB(pd->m, rtableid); 216 217 return (rm); 218 } 219 220 static int 221 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, 222 struct pf_addr *naddr, uint16_t *nport, uint16_t low, 223 uint16_t high, struct pf_ksrc_node **sn, 224 struct pf_srchash **sh, struct pf_kpool *rpool, 225 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) 226 { 227 struct pf_state_key_cmp key; 228 struct pf_addr init_addr; 229 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; 230 int sidx = pd->sidx; 231 int didx = pd->didx; 232 233 bzero(&init_addr, sizeof(init_addr)); 234 235 if (udp_mapping) { 236 MPASS(*udp_mapping == NULL); 237 } 238 239 /* 240 * If we are UDP and have an existing mapping we can get source port 241 * from the mapping. In this case we have to look up the src_node as 242 * pf_map_addr would. 243 */ 244 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) { 245 struct pf_udp_endpoint_cmp udp_source; 246 247 bzero(&udp_source, sizeof(udp_source)); 248 udp_source.af = pd->af; 249 PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af); 250 udp_source.port = pd->nsport; 251 if (udp_mapping) { 252 *udp_mapping = pf_udp_mapping_find(&udp_source); 253 if (*udp_mapping) { 254 PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); 255 *nport = (*udp_mapping)->endpoints[1].port; 256 /* Try to find a src_node as per pf_map_addr(). */ 257 if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && 258 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 259 *sn = pf_find_src_node(&pd->nsaddr, r, 260 pd->af, sh, sn_type, false); 261 if (*sn != NULL) 262 PF_SRC_NODE_UNLOCK(*sn); 263 return (0); 264 } else { 265 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, 266 pd->nsport, &init_addr, 0); 267 if (*udp_mapping == NULL) 268 return (1); 269 } 270 } 271 } 272 273 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr, 274 sn, sh, rpool, sn_type)) 275 goto failed; 276 277 if (pd->proto == IPPROTO_ICMP) { 278 if (*nport == htons(ICMP_ECHO)) { 279 low = 1; 280 high = 65535; 281 } else 282 return (0); /* Don't try to modify non-echo ICMP */ 283 } 284 #ifdef INET6 285 if (pd->proto == IPPROTO_ICMPV6) { 286 if (*nport == htons(ICMP6_ECHO_REQUEST)) { 287 low = 1; 288 high = 65535; 289 } else 290 return (0); /* Don't try to modify non-echo ICMP */ 291 } 292 #endif /* INET6 */ 293 294 bzero(&key, sizeof(key)); 295 key.af = pd->naf; 296 key.proto = pd->proto; 297 298 do { 299 PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af); 300 PF_ACPY(&key.addr[sidx], naddr, key.af); 301 key.port[didx] = pd->ndport; 302 303 if (udp_mapping && *udp_mapping) 304 PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); 305 306 /* 307 * port search; start random, step; 308 * similar 2 portloop in in_pcbbind 309 */ 310 if (pd->proto == IPPROTO_SCTP) { 311 key.port[sidx] = pd->nsport; 312 if (!pf_find_state_all_exists(&key, dir)) { 313 *nport = pd->nsport; 314 return (0); 315 } else { 316 return (1); /* Fail mapping. */ 317 } 318 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 319 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { 320 /* 321 * XXX bug: icmp states don't use the id on both sides. 322 * (traceroute -I through nat) 323 */ 324 key.port[sidx] = pd->nsport; 325 if (!pf_find_state_all_exists(&key, dir)) { 326 *nport = pd->nsport; 327 return (0); 328 } 329 } else if (low == high) { 330 key.port[sidx] = htons(low); 331 if (!pf_find_state_all_exists(&key, dir)) { 332 if (udp_mapping && *udp_mapping != NULL) { 333 (*udp_mapping)->endpoints[1].port = htons(low); 334 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 335 *nport = htons(low); 336 return (0); 337 } 338 } else { 339 *nport = htons(low); 340 return (0); 341 } 342 } 343 } else { 344 uint32_t tmp; 345 uint16_t cut; 346 347 if (low > high) { 348 tmp = low; 349 low = high; 350 high = tmp; 351 } 352 /* low < high */ 353 cut = arc4random() % (1 + high - low) + low; 354 /* low <= cut <= high */ 355 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 356 if (udp_mapping && *udp_mapping != NULL) { 357 (*udp_mapping)->endpoints[sidx].port = htons(tmp); 358 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 359 *nport = htons(tmp); 360 return (0); 361 } 362 } else { 363 key.port[sidx] = htons(tmp); 364 if (!pf_find_state_all_exists(&key, dir)) { 365 *nport = htons(tmp); 366 return (0); 367 } 368 } 369 } 370 tmp = cut; 371 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 372 if (pd->proto == IPPROTO_UDP && 373 (rpool->opts & PF_POOL_ENDPI && 374 udp_mapping != NULL)) { 375 (*udp_mapping)->endpoints[1].port = htons(tmp); 376 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 377 *nport = htons(tmp); 378 return (0); 379 } 380 } else { 381 key.port[sidx] = htons(tmp); 382 if (!pf_find_state_all_exists(&key, dir)) { 383 *nport = htons(tmp); 384 return (0); 385 } 386 } 387 } 388 } 389 390 switch (rpool->opts & PF_POOL_TYPEMASK) { 391 case PF_POOL_RANDOM: 392 case PF_POOL_ROUNDROBIN: 393 /* 394 * pick a different source address since we're out 395 * of free port choices for the current one. 396 */ 397 (*sn) = NULL; 398 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, 399 &init_addr, sn, sh, rpool, sn_type)) 400 return (1); 401 break; 402 case PF_POOL_NONE: 403 case PF_POOL_SRCHASH: 404 case PF_POOL_BITMASK: 405 default: 406 return (1); 407 } 408 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 409 410 failed: 411 if (udp_mapping) { 412 uma_zfree(V_pf_udp_mapping_z, *udp_mapping); 413 *udp_mapping = NULL; 414 } 415 416 return (1); /* none available */ 417 } 418 419 static bool 420 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) 421 { 422 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) 423 return (true); 424 return (false); 425 } 426 427 static int 428 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, 429 struct pf_addr *naddr, uint16_t *nport, 430 struct pf_ksrc_node **sn, struct pf_srchash **sh, 431 struct pf_udp_mapping **udp_mapping) 432 { 433 uint16_t psmask, low, highmask; 434 uint16_t i, ahigh, cut; 435 int ashift, psidshift; 436 437 ashift = 16 - r->rdr.mape.offset; 438 psidshift = ashift - r->rdr.mape.psidlen; 439 psmask = r->rdr.mape.psid & ((1U << r->rdr.mape.psidlen) - 1); 440 psmask = psmask << psidshift; 441 highmask = (1U << psidshift) - 1; 442 443 ahigh = (1U << r->rdr.mape.offset) - 1; 444 cut = arc4random() & ahigh; 445 if (cut == 0) 446 cut = 1; 447 448 for (i = cut; i <= ahigh; i++) { 449 low = (i << ashift) | psmask; 450 if (!pf_get_sport(pd, r, 451 naddr, nport, low, low | highmask, sn, sh, &r->rdr, 452 udp_mapping, PF_SN_NAT)) 453 return (0); 454 } 455 for (i = cut - 1; i > 0; i--) { 456 low = (i << ashift) | psmask; 457 if (!pf_get_sport(pd, r, 458 naddr, nport, low, low | highmask, sn, sh, &r->rdr, 459 udp_mapping, PF_SN_NAT)) 460 return (0); 461 } 462 return (1); 463 } 464 465 u_short 466 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, 467 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, 468 struct pf_kpool *rpool) 469 { 470 u_short reason = PFRES_MATCH; 471 struct pf_addr *raddr = NULL, *rmask = NULL; 472 uint64_t hashidx; 473 int cnt; 474 475 mtx_lock(&rpool->mtx); 476 /* Find the route using chosen algorithm. Store the found route 477 in src_node if it was given or found. */ 478 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { 479 reason = PFRES_MAPFAILED; 480 goto done_pool_mtx; 481 } 482 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 483 switch (af) { 484 #ifdef INET 485 case AF_INET: 486 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 487 !PF_POOL_DYNTYPE(rpool->opts)) { 488 reason = PFRES_MAPFAILED; 489 goto done_pool_mtx; 490 } 491 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 492 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 493 break; 494 #endif /* INET */ 495 #ifdef INET6 496 case AF_INET6: 497 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 498 !PF_POOL_DYNTYPE(rpool->opts)) { 499 reason = PFRES_MAPFAILED; 500 goto done_pool_mtx; 501 } 502 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 503 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 504 break; 505 #endif /* INET6 */ 506 default: 507 unhandled_af(af); 508 } 509 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 510 if (!PF_POOL_DYNTYPE(rpool->opts)) { 511 reason = PFRES_MAPFAILED; 512 goto done_pool_mtx; /* unsupported */ 513 } 514 } else { 515 raddr = &rpool->cur->addr.v.a.addr; 516 rmask = &rpool->cur->addr.v.a.mask; 517 } 518 519 switch (rpool->opts & PF_POOL_TYPEMASK) { 520 case PF_POOL_NONE: 521 PF_ACPY(naddr, raddr, af); 522 break; 523 case PF_POOL_BITMASK: 524 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 525 break; 526 case PF_POOL_RANDOM: 527 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 528 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; 529 if (cnt == 0) 530 rpool->tblidx = 0; 531 else 532 rpool->tblidx = (int)arc4random_uniform(cnt); 533 memset(&rpool->counter, 0, sizeof(rpool->counter)); 534 if (pfr_pool_get(rpool->cur->addr.p.tbl, 535 &rpool->tblidx, &rpool->counter, af, NULL)) { 536 reason = PFRES_MAPFAILED; 537 goto done_pool_mtx; /* unsupported */ 538 } 539 PF_ACPY(naddr, &rpool->counter, af); 540 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 541 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; 542 if (cnt == 0) 543 rpool->tblidx = 0; 544 else 545 rpool->tblidx = (int)arc4random_uniform(cnt); 546 memset(&rpool->counter, 0, sizeof(rpool->counter)); 547 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 548 &rpool->tblidx, &rpool->counter, af, 549 pf_islinklocal)) { 550 reason = PFRES_MAPFAILED; 551 goto done_pool_mtx; /* unsupported */ 552 } 553 PF_ACPY(naddr, &rpool->counter, af); 554 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { 555 switch (af) { 556 #ifdef INET 557 case AF_INET: 558 rpool->counter.addr32[0] = arc4random(); 559 break; 560 #endif /* INET */ 561 #ifdef INET6 562 case AF_INET6: 563 if (rmask->addr32[3] != 0xffffffff) 564 rpool->counter.addr32[3] = 565 arc4random(); 566 else 567 break; 568 if (rmask->addr32[2] != 0xffffffff) 569 rpool->counter.addr32[2] = 570 arc4random(); 571 else 572 break; 573 if (rmask->addr32[1] != 0xffffffff) 574 rpool->counter.addr32[1] = 575 arc4random(); 576 else 577 break; 578 if (rmask->addr32[0] != 0xffffffff) 579 rpool->counter.addr32[0] = 580 arc4random(); 581 break; 582 #endif /* INET6 */ 583 } 584 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 585 PF_ACPY(init_addr, naddr, af); 586 587 } else { 588 PF_AINC(&rpool->counter, af); 589 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 590 } 591 break; 592 case PF_POOL_SRCHASH: 593 { 594 unsigned char hash[16]; 595 596 hashidx = 597 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 598 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 599 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; 600 if (cnt == 0) 601 rpool->tblidx = 0; 602 else 603 rpool->tblidx = (int)(hashidx % cnt); 604 memset(&rpool->counter, 0, sizeof(rpool->counter)); 605 if (pfr_pool_get(rpool->cur->addr.p.tbl, 606 &rpool->tblidx, &rpool->counter, af, NULL)) { 607 reason = PFRES_MAPFAILED; 608 goto done_pool_mtx; /* unsupported */ 609 } 610 PF_ACPY(naddr, &rpool->counter, af); 611 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 612 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; 613 if (cnt == 0) 614 rpool->tblidx = 0; 615 else 616 rpool->tblidx = (int)(hashidx % cnt); 617 memset(&rpool->counter, 0, sizeof(rpool->counter)); 618 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 619 &rpool->tblidx, &rpool->counter, af, 620 pf_islinklocal)) { 621 reason = PFRES_MAPFAILED; 622 goto done_pool_mtx; /* unsupported */ 623 } 624 PF_ACPY(naddr, &rpool->counter, af); 625 } else { 626 PF_POOLMASK(naddr, raddr, rmask, 627 (struct pf_addr *)&hash, af); 628 } 629 break; 630 } 631 case PF_POOL_ROUNDROBIN: 632 { 633 struct pf_kpooladdr *acur = rpool->cur; 634 635 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 636 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 637 &rpool->tblidx, &rpool->counter, af, NULL)) 638 goto get_addr; 639 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 640 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 641 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) 642 goto get_addr; 643 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 644 goto get_addr; 645 646 try_next: 647 if (TAILQ_NEXT(rpool->cur, entries) == NULL) 648 rpool->cur = TAILQ_FIRST(&rpool->list); 649 else 650 rpool->cur = TAILQ_NEXT(rpool->cur, entries); 651 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 652 if (pfr_pool_get(rpool->cur->addr.p.tbl, 653 &rpool->tblidx, &rpool->counter, af, NULL)) { 654 /* table contains no address of type 'af' */ 655 if (rpool->cur != acur) 656 goto try_next; 657 reason = PFRES_MAPFAILED; 658 goto done_pool_mtx; 659 } 660 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 661 rpool->tblidx = -1; 662 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 663 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) { 664 /* table contains no address of type 'af' */ 665 if (rpool->cur != acur) 666 goto try_next; 667 reason = PFRES_MAPFAILED; 668 goto done_pool_mtx; 669 } 670 } else { 671 raddr = &rpool->cur->addr.v.a.addr; 672 rmask = &rpool->cur->addr.v.a.mask; 673 PF_ACPY(&rpool->counter, raddr, af); 674 } 675 676 get_addr: 677 PF_ACPY(naddr, &rpool->counter, af); 678 if (init_addr != NULL && PF_AZERO(init_addr, af)) 679 PF_ACPY(init_addr, naddr, af); 680 PF_AINC(&rpool->counter, af); 681 break; 682 } 683 } 684 685 if (nkif) 686 *nkif = rpool->cur->kif; 687 688 done_pool_mtx: 689 mtx_unlock(&rpool->mtx); 690 691 if (reason) { 692 counter_u64_add(V_pf_status.counters[reason], 1); 693 } 694 695 return (reason); 696 } 697 698 u_short 699 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, 700 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, 701 struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool, 702 pf_sn_types_t sn_type) 703 { 704 u_short reason = 0; 705 706 KASSERT(*sn == NULL, ("*sn not NULL")); 707 708 /* 709 * If this is a sticky-address rule, try to find an existing src_node. 710 * Request the sh to be unlocked if sn was not found, as we never 711 * insert a new sn when parsing the ruleset. 712 */ 713 if (rpool->opts & PF_POOL_STICKYADDR && 714 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 715 *sn = pf_find_src_node(saddr, r, af, sh, sn_type, false); 716 717 if (*sn != NULL) { 718 PF_SRC_NODE_LOCK_ASSERT(*sn); 719 720 /* If the supplied address is the same as the current one we've 721 * been asked before, so tell the caller that there's no other 722 * address to be had. */ 723 if (PF_AEQ(naddr, &(*sn)->raddr, af)) { 724 reason = PFRES_MAPFAILED; 725 goto done; 726 } 727 728 PF_ACPY(naddr, &(*sn)->raddr, af); 729 if (nkif) 730 *nkif = (*sn)->rkif; 731 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 732 printf("pf_map_addr: src tracking maps "); 733 pf_print_host(saddr, 0, af); 734 printf(" to "); 735 pf_print_host(naddr, 0, af); 736 if (nkif) 737 printf("@%s", (*nkif)->pfik_name); 738 printf("\n"); 739 } 740 goto done; 741 } 742 743 /* 744 * Source node has not been found. Find a new address and store it 745 * in variables given by the caller. 746 */ 747 if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) { 748 /* pf_map_addr() sets reason counters on its own */ 749 goto done; 750 } 751 752 if (V_pf_status.debug >= PF_DEBUG_NOISY && 753 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 754 printf("pf_map_addr: selected address "); 755 pf_print_host(naddr, 0, af); 756 if (nkif) 757 printf("@%s", (*nkif)->pfik_name); 758 printf("\n"); 759 } 760 761 done: 762 if ((*sn) != NULL) 763 PF_SRC_NODE_UNLOCK(*sn); 764 765 if (reason) { 766 counter_u64_add(V_pf_status.counters[reason], 1); 767 } 768 769 return (reason); 770 } 771 772 u_short 773 pf_get_translation(struct pf_pdesc *pd, int off, 774 struct pf_state_key **skp, struct pf_state_key **nkp, 775 struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp, 776 struct pf_udp_mapping **udp_mapping) 777 { 778 struct pf_krule *r = NULL; 779 struct pf_addr *naddr; 780 struct pf_ksrc_node *sn = NULL; 781 struct pf_srchash *sh = NULL; 782 uint16_t *nportp; 783 uint16_t low, high; 784 u_short reason; 785 786 PF_RULES_RASSERT(); 787 KASSERT(*skp == NULL, ("*skp not NULL")); 788 KASSERT(*nkp == NULL, ("*nkp not NULL")); 789 790 *rp = NULL; 791 792 if (pd->dir == PF_OUT) { 793 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack); 794 if (r == NULL) 795 r = pf_match_translation(pd, PF_RULESET_NAT, anchor_stack); 796 } else { 797 r = pf_match_translation(pd, PF_RULESET_RDR, anchor_stack); 798 if (r == NULL) 799 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack); 800 } 801 802 if (r == NULL) 803 return (PFRES_MAX); 804 805 switch (r->action) { 806 case PF_NONAT: 807 case PF_NOBINAT: 808 case PF_NORDR: 809 return (PFRES_MAX); 810 } 811 812 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, skp, nkp)) 813 return (PFRES_MEMORY); 814 815 naddr = &(*nkp)->addr[1]; 816 nportp = &(*nkp)->port[1]; 817 818 switch (r->action) { 819 case PF_NAT: 820 if (pd->proto == IPPROTO_ICMP) { 821 low = 1; 822 high = 65535; 823 } else { 824 low = r->rdr.proxy_port[0]; 825 high = r->rdr.proxy_port[1]; 826 } 827 if (r->rdr.mape.offset > 0) { 828 if (pf_get_mape_sport(pd, r, naddr, nportp, &sn, 829 &sh, udp_mapping)) { 830 DPFPRINTF(PF_DEBUG_MISC, 831 ("pf: MAP-E port allocation (%u/%u/%u)" 832 " failed\n", 833 r->rdr.mape.offset, 834 r->rdr.mape.psidlen, 835 r->rdr.mape.psid)); 836 reason = PFRES_MAPFAILED; 837 goto notrans; 838 } 839 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn, 840 &sh, &r->rdr, udp_mapping, PF_SN_NAT)) { 841 DPFPRINTF(PF_DEBUG_MISC, 842 ("pf: NAT proxy port allocation (%u-%u) failed\n", 843 r->rdr.proxy_port[0], r->rdr.proxy_port[1])); 844 reason = PFRES_MAPFAILED; 845 goto notrans; 846 } 847 break; 848 case PF_BINAT: 849 switch (pd->dir) { 850 case PF_OUT: 851 if (r->rdr.cur->addr.type == PF_ADDR_DYNIFTL){ 852 switch (pd->af) { 853 #ifdef INET 854 case AF_INET: 855 if (r->rdr.cur->addr.p.dyn-> 856 pfid_acnt4 < 1) { 857 reason = PFRES_MAPFAILED; 858 goto notrans; 859 } 860 PF_POOLMASK(naddr, 861 &r->rdr.cur->addr.p.dyn-> 862 pfid_addr4, 863 &r->rdr.cur->addr.p.dyn-> 864 pfid_mask4, &pd->nsaddr, AF_INET); 865 break; 866 #endif /* INET */ 867 #ifdef INET6 868 case AF_INET6: 869 if (r->rdr.cur->addr.p.dyn-> 870 pfid_acnt6 < 1) { 871 reason = PFRES_MAPFAILED; 872 goto notrans; 873 } 874 PF_POOLMASK(naddr, 875 &r->rdr.cur->addr.p.dyn-> 876 pfid_addr6, 877 &r->rdr.cur->addr.p.dyn-> 878 pfid_mask6, &pd->nsaddr, AF_INET6); 879 break; 880 #endif /* INET6 */ 881 } 882 } else 883 PF_POOLMASK(naddr, 884 &r->rdr.cur->addr.v.a.addr, 885 &r->rdr.cur->addr.v.a.mask, &pd->nsaddr, 886 pd->af); 887 break; 888 case PF_IN: 889 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 890 switch (pd->af) { 891 #ifdef INET 892 case AF_INET: 893 if (r->src.addr.p.dyn->pfid_acnt4 < 1) { 894 reason = PFRES_MAPFAILED; 895 goto notrans; 896 } 897 PF_POOLMASK(naddr, 898 &r->src.addr.p.dyn->pfid_addr4, 899 &r->src.addr.p.dyn->pfid_mask4, 900 &pd->ndaddr, AF_INET); 901 break; 902 #endif /* INET */ 903 #ifdef INET6 904 case AF_INET6: 905 if (r->src.addr.p.dyn->pfid_acnt6 < 1) { 906 reason = PFRES_MAPFAILED; 907 goto notrans; 908 } 909 PF_POOLMASK(naddr, 910 &r->src.addr.p.dyn->pfid_addr6, 911 &r->src.addr.p.dyn->pfid_mask6, 912 &pd->ndaddr, AF_INET6); 913 break; 914 #endif /* INET6 */ 915 } 916 } else 917 PF_POOLMASK(naddr, &r->src.addr.v.a.addr, 918 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); 919 break; 920 } 921 break; 922 case PF_RDR: { 923 struct pf_state_key_cmp key; 924 int tries; 925 uint16_t cut, low, high, nport; 926 927 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL, 928 NULL, &sn, &sh, &r->rdr, PF_SN_NAT); 929 if (reason != 0) 930 goto notrans; 931 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 932 PF_POOLMASK(naddr, naddr, &r->rdr.cur->addr.v.a.mask, 933 &pd->ndaddr, pd->af); 934 935 /* Do not change SCTP ports. */ 936 if (pd->proto == IPPROTO_SCTP) 937 break; 938 939 if (r->rdr.proxy_port[1]) { 940 uint32_t tmp_nport; 941 942 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % 943 (r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 944 1)) + r->rdr.proxy_port[0]; 945 946 /* Wrap around if necessary. */ 947 if (tmp_nport > 65535) 948 tmp_nport -= 65535; 949 nport = htons((uint16_t)tmp_nport); 950 } else if (r->rdr.proxy_port[0]) 951 nport = htons(r->rdr.proxy_port[0]); 952 else 953 nport = pd->ndport; 954 955 /* 956 * Update the destination port. 957 */ 958 *nportp = nport; 959 960 /* 961 * Do we have a source port conflict in the stack state? Try to 962 * modulate the source port if so. Note that this is racy since 963 * the state lookup may not find any matches here but will once 964 * pf_create_state() actually instantiates the state. 965 */ 966 bzero(&key, sizeof(key)); 967 key.af = pd->af; 968 key.proto = pd->proto; 969 key.port[0] = pd->nsport; 970 PF_ACPY(&key.addr[0], &pd->nsaddr, key.af); 971 key.port[1] = nport; 972 PF_ACPY(&key.addr[1], naddr, key.af); 973 974 if (!pf_find_state_all_exists(&key, PF_OUT)) 975 break; 976 977 tries = 0; 978 979 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ 980 high = 65535; 981 cut = arc4random() % (1 + high - low) + low; 982 for (uint32_t tmp = cut; 983 tmp <= high && tmp <= UINT16_MAX && 984 tries < V_pf_rdr_srcport_rewrite_tries; 985 tmp++, tries++) { 986 key.port[0] = htons(tmp); 987 if (!pf_find_state_all_exists(&key, PF_OUT)) { 988 /* Update the source port. */ 989 (*nkp)->port[0] = htons(tmp); 990 goto out; 991 } 992 } 993 for (uint32_t tmp = cut - 1; 994 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries; 995 tmp--, tries++) { 996 key.port[0] = htons(tmp); 997 if (!pf_find_state_all_exists(&key, PF_OUT)) { 998 /* Update the source port. */ 999 (*nkp)->port[0] = htons(tmp); 1000 goto out; 1001 } 1002 } 1003 1004 /* 1005 * We failed to find a match. Push on ahead anyway, let 1006 * pf_state_insert() be the arbiter of whether the state 1007 * conflict is tolerable. In particular, with TCP connections 1008 * the state may be reused if the TCP state is terminal. 1009 */ 1010 DPFPRINTF(PF_DEBUG_MISC, 1011 ("pf: RDR source port allocation failed\n")); 1012 break; 1013 1014 out: 1015 DPFPRINTF(PF_DEBUG_MISC, 1016 ("pf: RDR source port allocation %u->%u\n", 1017 ntohs(pd->nsport), ntohs((*nkp)->port[0]))); 1018 break; 1019 } 1020 default: 1021 panic("%s: unknown action %u", __func__, r->action); 1022 } 1023 1024 /* Return success only if translation really happened. */ 1025 if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { 1026 *rp = r; 1027 return (PFRES_MATCH); 1028 } 1029 1030 reason = PFRES_MAX; 1031 notrans: 1032 uma_zfree(V_pf_state_key_z, *nkp); 1033 uma_zfree(V_pf_state_key_z, *skp); 1034 *skp = *nkp = NULL; 1035 1036 return (reason); 1037 } 1038 1039 int 1040 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) 1041 { 1042 #if defined(INET) && defined(INET6) 1043 struct pf_addr ndaddr, nsaddr, naddr; 1044 u_int16_t nport = 0; 1045 int prefixlen = 96; 1046 struct pf_srchash *sh = NULL; 1047 struct pf_ksrc_node *sns = NULL; 1048 1049 bzero(&nsaddr, sizeof(nsaddr)); 1050 bzero(&ndaddr, sizeof(ndaddr)); 1051 1052 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1053 printf("pf: af-to %s %s, ", 1054 pd->naf == AF_INET ? "inet" : "inet6", 1055 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr"); 1056 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 1057 printf(" -> "); 1058 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 1059 printf("\n"); 1060 } 1061 1062 if (TAILQ_EMPTY(&r->nat.list)) 1063 panic("pf_get_transaddr_af: no nat pool for source address"); 1064 1065 /* get source address and port */ 1066 if (pf_get_sport(pd, r, &nsaddr, &nport, 1067 r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat, 1068 NULL, PF_SN_NAT)) { 1069 DPFPRINTF(PF_DEBUG_MISC, 1070 ("pf: af-to NAT proxy port allocation (%u-%u) failed", 1071 r->nat.proxy_port[0], r->nat.proxy_port[1])); 1072 return (-1); 1073 } 1074 1075 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 1076 pd->ndport = ntohs(pd->ndport); 1077 if (pd->ndport == ICMP6_ECHO_REQUEST) 1078 pd->ndport = ICMP_ECHO; 1079 else if (pd->ndport == ICMP6_ECHO_REPLY) 1080 pd->ndport = ICMP_ECHOREPLY; 1081 pd->ndport = htons(pd->ndport); 1082 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 1083 pd->nsport = ntohs(pd->nsport); 1084 if (pd->ndport == ICMP_ECHO) 1085 pd->ndport = ICMP6_ECHO_REQUEST; 1086 else if (pd->ndport == ICMP_ECHOREPLY) 1087 pd->ndport = ICMP6_ECHO_REPLY; 1088 pd->nsport = htons(pd->nsport); 1089 } 1090 1091 /* get the destination address and port */ 1092 if (! TAILQ_EMPTY(&r->rdr.list)) { 1093 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL, 1094 &sns, NULL, &r->rdr, PF_SN_NAT)) 1095 return (-1); 1096 if (r->rdr.proxy_port[0]) 1097 pd->ndport = htons(r->rdr.proxy_port[0]); 1098 1099 if (pd->naf == AF_INET) { 1100 /* The prefix is the IPv4 rdr address */ 1101 prefixlen = in_mask2len( 1102 (struct in_addr *)&r->rdr.cur->addr.v.a.mask); 1103 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1104 prefixlen); 1105 } else { 1106 /* The prefix is the IPv6 rdr address */ 1107 prefixlen = in6_mask2len( 1108 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL); 1109 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1110 prefixlen); 1111 } 1112 } else { 1113 if (pd->naf == AF_INET) { 1114 /* The prefix is the IPv6 dst address */ 1115 prefixlen = in6_mask2len( 1116 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL); 1117 if (prefixlen < 32) 1118 prefixlen = 96; 1119 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr, 1120 prefixlen); 1121 } else { 1122 /* 1123 * The prefix is the IPv6 nat address 1124 * (that was stored in pd->nsaddr) 1125 */ 1126 prefixlen = in6_mask2len( 1127 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL); 1128 if (prefixlen > 96) 1129 prefixlen = 96; 1130 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr, 1131 prefixlen); 1132 } 1133 } 1134 1135 PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf); 1136 PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf); 1137 1138 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1139 printf("pf: af-to %s done, prefixlen %d, ", 1140 pd->naf == AF_INET ? "inet" : "inet6", 1141 prefixlen); 1142 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 1143 printf(" -> "); 1144 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 1145 printf("\n"); 1146 } 1147 1148 return (0); 1149 #else 1150 return (-1); 1151 #endif 1152 } 1153