1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_pf.h" 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 44 #include <sys/param.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <crypto/siphash/siphash.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 #include <net/pfvar.h> 56 #include <net/if_pflog.h> 57 58 #ifdef INET 59 #include <netinet/in_var.h> 60 #endif 61 62 #ifdef INET6 63 #include <netinet6/in6_var.h> 64 #endif 65 66 67 /* 68 * Limit the amount of work we do to find a free source port for redirects that 69 * introduce a state conflict. 70 */ 71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) 72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; 73 74 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x 75 76 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, 77 struct pf_poolhashkey *, sa_family_t); 78 static struct pf_krule *pf_match_translation(struct pf_pdesc *, 79 int, struct pf_kanchor_stackframe *); 80 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, 81 struct pf_addr *, uint16_t *, uint16_t, uint16_t, 82 struct pf_ksrc_node **, struct pf_srchash **, 83 struct pf_kpool *, struct pf_udp_mapping **, 84 pf_sn_types_t); 85 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); 86 87 static uint64_t 88 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 89 struct pf_poolhashkey *key, sa_family_t af) 90 { 91 SIPHASH_CTX ctx; 92 #ifdef INET6 93 union { 94 uint64_t hash64; 95 uint32_t hash32[2]; 96 } h; 97 #endif 98 uint64_t res = 0; 99 100 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); 101 102 switch (af) { 103 #ifdef INET 104 case AF_INET: 105 res = SipHash24(&ctx, (const uint8_t *)key, 106 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 107 hash->addr32[0] = res; 108 break; 109 #endif /* INET */ 110 #ifdef INET6 111 case AF_INET6: 112 res = SipHash24(&ctx, (const uint8_t *)key, 113 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); 114 h.hash64 = res; 115 hash->addr32[0] = h.hash32[0]; 116 hash->addr32[1] = h.hash32[1]; 117 /* 118 * siphash isn't big enough, but flipping it around is 119 * good enough here. 120 */ 121 hash->addr32[2] = ~h.hash32[1]; 122 hash->addr32[3] = ~h.hash32[0]; 123 break; 124 #endif /* INET6 */ 125 } 126 return (res); 127 } 128 129 static struct pf_krule * 130 pf_match_translation(struct pf_pdesc *pd, 131 int rs_num, struct pf_kanchor_stackframe *anchor_stack) 132 { 133 struct pf_krule *r, *rm = NULL; 134 struct pf_kruleset *ruleset = NULL; 135 int tag = -1; 136 int rtableid = -1; 137 int asd = 0; 138 139 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 140 while (r != NULL) { 141 struct pf_rule_addr *src = NULL, *dst = NULL; 142 struct pf_addr_wrap *xdst = NULL; 143 144 if (r->action == PF_BINAT && pd->dir == PF_IN) { 145 src = &r->dst; 146 if (r->rdr.cur != NULL) 147 xdst = &r->rdr.cur->addr; 148 } else { 149 src = &r->src; 150 dst = &r->dst; 151 } 152 153 pf_counter_u64_add(&r->evaluations, 1); 154 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot) 155 r = r->skip[PF_SKIP_IFP]; 156 else if (r->direction && r->direction != pd->dir) 157 r = r->skip[PF_SKIP_DIR]; 158 else if (r->af && r->af != pd->af) 159 r = r->skip[PF_SKIP_AF]; 160 else if (r->proto && r->proto != pd->proto) 161 r = r->skip[PF_SKIP_PROTO]; 162 else if (PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af, 163 src->neg, pd->kif, M_GETFIB(pd->m))) 164 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 165 PF_SKIP_DST_ADDR]; 166 else if (src->port_op && !pf_match_port(src->port_op, 167 src->port[0], src->port[1], pd->nsport)) 168 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 169 PF_SKIP_DST_PORT]; 170 else if (dst != NULL && 171 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL, 172 M_GETFIB(pd->m))) 173 r = r->skip[PF_SKIP_DST_ADDR]; 174 else if (xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af, 175 0, NULL, M_GETFIB(pd->m))) 176 r = TAILQ_NEXT(r, entries); 177 else if (dst != NULL && dst->port_op && 178 !pf_match_port(dst->port_op, dst->port[0], 179 dst->port[1], pd->ndport)) 180 r = r->skip[PF_SKIP_DST_PORT]; 181 else if (r->match_tag && !pf_match_tag(pd->m, r, &tag, 182 pd->pf_mtag ? pd->pf_mtag->tag : 0)) 183 r = TAILQ_NEXT(r, entries); 184 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 185 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, 186 &pd->hdr.tcp), r->os_fingerprint))) 187 r = TAILQ_NEXT(r, entries); 188 else { 189 if (r->tag) 190 tag = r->tag; 191 if (r->rtableid >= 0) 192 rtableid = r->rtableid; 193 if (r->anchor == NULL) { 194 rm = r; 195 if (rm->action == PF_NONAT || 196 rm->action == PF_NORDR || 197 rm->action == PF_NOBINAT) { 198 rm = NULL; 199 } 200 break; 201 } else 202 pf_step_into_anchor(anchor_stack, &asd, 203 &ruleset, rs_num, &r, NULL); 204 } 205 if (r == NULL) 206 pf_step_out_of_anchor(anchor_stack, &asd, &ruleset, 207 rs_num, &r, NULL, NULL); 208 } 209 210 if (tag > 0 && pf_tag_packet(pd, tag)) 211 return (NULL); 212 if (rtableid >= 0) 213 M_SETFIB(pd->m, rtableid); 214 215 return (rm); 216 } 217 218 static int 219 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, 220 struct pf_addr *naddr, uint16_t *nport, uint16_t low, 221 uint16_t high, struct pf_ksrc_node **sn, 222 struct pf_srchash **sh, struct pf_kpool *rpool, 223 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) 224 { 225 struct pf_state_key_cmp key; 226 struct pf_addr init_addr; 227 228 bzero(&init_addr, sizeof(init_addr)); 229 230 if (udp_mapping) { 231 MPASS(*udp_mapping == NULL); 232 } 233 234 /* 235 * If we are UDP and have an existing mapping we can get source port 236 * from the mapping. In this case we have to look up the src_node as 237 * pf_map_addr would. 238 */ 239 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) { 240 struct pf_udp_endpoint_cmp udp_source; 241 242 bzero(&udp_source, sizeof(udp_source)); 243 udp_source.af = pd->af; 244 PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af); 245 udp_source.port = pd->nsport; 246 if (udp_mapping) { 247 *udp_mapping = pf_udp_mapping_find(&udp_source); 248 if (*udp_mapping) { 249 PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); 250 *nport = (*udp_mapping)->endpoints[1].port; 251 /* Try to find a src_node as per pf_map_addr(). */ 252 if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && 253 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 254 *sn = pf_find_src_node(&pd->nsaddr, r, 255 pd->af, sh, sn_type, false); 256 if (*sn != NULL) 257 PF_SRC_NODE_UNLOCK(*sn); 258 return (0); 259 } else { 260 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, 261 pd->nsport, &init_addr, 0); 262 if (*udp_mapping == NULL) 263 return (1); 264 } 265 } 266 } 267 268 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr, 269 sn, sh, rpool, sn_type)) 270 goto failed; 271 272 if (pd->proto == IPPROTO_ICMP) { 273 if (*nport == htons(ICMP_ECHO)) { 274 low = 1; 275 high = 65535; 276 } else 277 return (0); /* Don't try to modify non-echo ICMP */ 278 } 279 #ifdef INET6 280 if (pd->proto == IPPROTO_ICMPV6) { 281 if (*nport == htons(ICMP6_ECHO_REQUEST)) { 282 low = 1; 283 high = 65535; 284 } else 285 return (0); /* Don't try to modify non-echo ICMP */ 286 } 287 #endif /* INET6 */ 288 289 bzero(&key, sizeof(key)); 290 key.af = pd->naf; 291 key.proto = pd->proto; 292 key.port[0] = pd->ndport; 293 PF_ACPY(&key.addr[0], &pd->ndaddr, key.af); 294 295 do { 296 PF_ACPY(&key.addr[1], naddr, key.af); 297 if (udp_mapping && *udp_mapping) 298 PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); 299 300 /* 301 * port search; start random, step; 302 * similar 2 portloop in in_pcbbind 303 */ 304 if (pd->proto == IPPROTO_SCTP) { 305 key.port[1] = pd->nsport; 306 if (!pf_find_state_all_exists(&key, PF_IN)) { 307 *nport = pd->nsport; 308 return (0); 309 } else { 310 return (1); /* Fail mapping. */ 311 } 312 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 313 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { 314 /* 315 * XXX bug: icmp states don't use the id on both sides. 316 * (traceroute -I through nat) 317 */ 318 key.port[1] = pd->nsport; 319 if (!pf_find_state_all_exists(&key, PF_IN)) { 320 *nport = pd->nsport; 321 return (0); 322 } 323 } else if (low == high) { 324 key.port[1] = htons(low); 325 if (!pf_find_state_all_exists(&key, PF_IN)) { 326 if (udp_mapping && *udp_mapping != NULL) { 327 (*udp_mapping)->endpoints[1].port = htons(low); 328 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 329 *nport = htons(low); 330 return (0); 331 } 332 } else { 333 *nport = htons(low); 334 return (0); 335 } 336 } 337 } else { 338 uint32_t tmp; 339 uint16_t cut; 340 341 if (low > high) { 342 tmp = low; 343 low = high; 344 high = tmp; 345 } 346 /* low < high */ 347 cut = arc4random() % (1 + high - low) + low; 348 /* low <= cut <= high */ 349 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 350 if (udp_mapping && *udp_mapping != NULL) { 351 (*udp_mapping)->endpoints[1].port = htons(tmp); 352 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 353 *nport = htons(tmp); 354 return (0); 355 } 356 } else { 357 key.port[1] = htons(tmp); 358 if (!pf_find_state_all_exists(&key, PF_IN)) { 359 *nport = htons(tmp); 360 return (0); 361 } 362 } 363 } 364 tmp = cut; 365 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 366 if (pd->proto == IPPROTO_UDP && 367 (rpool->opts & PF_POOL_ENDPI && 368 udp_mapping != NULL)) { 369 (*udp_mapping)->endpoints[1].port = htons(tmp); 370 if (pf_udp_mapping_insert(*udp_mapping) == 0) { 371 *nport = htons(tmp); 372 return (0); 373 } 374 } else { 375 key.port[1] = htons(tmp); 376 if (!pf_find_state_all_exists(&key, PF_IN)) { 377 *nport = htons(tmp); 378 return (0); 379 } 380 } 381 } 382 } 383 384 switch (rpool->opts & PF_POOL_TYPEMASK) { 385 case PF_POOL_RANDOM: 386 case PF_POOL_ROUNDROBIN: 387 /* 388 * pick a different source address since we're out 389 * of free port choices for the current one. 390 */ 391 (*sn) = NULL; 392 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, 393 &init_addr, sn, sh, rpool, sn_type)) 394 return (1); 395 break; 396 case PF_POOL_NONE: 397 case PF_POOL_SRCHASH: 398 case PF_POOL_BITMASK: 399 default: 400 return (1); 401 } 402 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 403 404 failed: 405 if (udp_mapping) { 406 uma_zfree(V_pf_udp_mapping_z, *udp_mapping); 407 *udp_mapping = NULL; 408 } 409 410 return (1); /* none available */ 411 } 412 413 static bool 414 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) 415 { 416 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) 417 return (true); 418 return (false); 419 } 420 421 static int 422 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, 423 struct pf_addr *naddr, uint16_t *nport, 424 struct pf_ksrc_node **sn, struct pf_srchash **sh, 425 struct pf_udp_mapping **udp_mapping) 426 { 427 uint16_t psmask, low, highmask; 428 uint16_t i, ahigh, cut; 429 int ashift, psidshift; 430 431 ashift = 16 - r->rdr.mape.offset; 432 psidshift = ashift - r->rdr.mape.psidlen; 433 psmask = r->rdr.mape.psid & ((1U << r->rdr.mape.psidlen) - 1); 434 psmask = psmask << psidshift; 435 highmask = (1U << psidshift) - 1; 436 437 ahigh = (1U << r->rdr.mape.offset) - 1; 438 cut = arc4random() & ahigh; 439 if (cut == 0) 440 cut = 1; 441 442 for (i = cut; i <= ahigh; i++) { 443 low = (i << ashift) | psmask; 444 if (!pf_get_sport(pd, r, 445 naddr, nport, low, low | highmask, sn, sh, &r->rdr, 446 udp_mapping, PF_SN_NAT)) 447 return (0); 448 } 449 for (i = cut - 1; i > 0; i--) { 450 low = (i << ashift) | psmask; 451 if (!pf_get_sport(pd, r, 452 naddr, nport, low, low | highmask, sn, sh, &r->rdr, 453 udp_mapping, PF_SN_NAT)) 454 return (0); 455 } 456 return (1); 457 } 458 459 u_short 460 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, 461 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, 462 struct pf_kpool *rpool) 463 { 464 u_short reason = PFRES_MATCH; 465 struct pf_addr *raddr = NULL, *rmask = NULL; 466 uint64_t hashidx; 467 int cnt; 468 469 mtx_lock(&rpool->mtx); 470 /* Find the route using chosen algorithm. Store the found route 471 in src_node if it was given or found. */ 472 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { 473 reason = PFRES_MAPFAILED; 474 goto done_pool_mtx; 475 } 476 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 477 switch (af) { 478 #ifdef INET 479 case AF_INET: 480 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 481 !PF_POOL_DYNTYPE(rpool->opts)) { 482 reason = PFRES_MAPFAILED; 483 goto done_pool_mtx; 484 } 485 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 486 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 487 break; 488 #endif /* INET */ 489 #ifdef INET6 490 case AF_INET6: 491 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 492 !PF_POOL_DYNTYPE(rpool->opts)) { 493 reason = PFRES_MAPFAILED; 494 goto done_pool_mtx; 495 } 496 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 497 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 498 break; 499 #endif /* INET6 */ 500 } 501 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 502 if (!PF_POOL_DYNTYPE(rpool->opts)) { 503 reason = PFRES_MAPFAILED; 504 goto done_pool_mtx; /* unsupported */ 505 } 506 } else { 507 raddr = &rpool->cur->addr.v.a.addr; 508 rmask = &rpool->cur->addr.v.a.mask; 509 } 510 511 switch (rpool->opts & PF_POOL_TYPEMASK) { 512 case PF_POOL_NONE: 513 PF_ACPY(naddr, raddr, af); 514 break; 515 case PF_POOL_BITMASK: 516 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 517 break; 518 case PF_POOL_RANDOM: 519 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 520 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; 521 rpool->tblidx = (int)arc4random_uniform(cnt); 522 memset(&rpool->counter, 0, sizeof(rpool->counter)); 523 if (pfr_pool_get(rpool->cur->addr.p.tbl, 524 &rpool->tblidx, &rpool->counter, af, NULL)) { 525 reason = PFRES_MAPFAILED; 526 goto done_pool_mtx; /* unsupported */ 527 } 528 PF_ACPY(naddr, &rpool->counter, af); 529 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 530 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; 531 rpool->tblidx = (int)arc4random_uniform(cnt); 532 memset(&rpool->counter, 0, sizeof(rpool->counter)); 533 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 534 &rpool->tblidx, &rpool->counter, af, 535 pf_islinklocal)) { 536 reason = PFRES_MAPFAILED; 537 goto done_pool_mtx; /* unsupported */ 538 } 539 PF_ACPY(naddr, &rpool->counter, af); 540 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { 541 switch (af) { 542 #ifdef INET 543 case AF_INET: 544 rpool->counter.addr32[0] = htonl(arc4random()); 545 break; 546 #endif /* INET */ 547 #ifdef INET6 548 case AF_INET6: 549 if (rmask->addr32[3] != 0xffffffff) 550 rpool->counter.addr32[3] = 551 htonl(arc4random()); 552 else 553 break; 554 if (rmask->addr32[2] != 0xffffffff) 555 rpool->counter.addr32[2] = 556 htonl(arc4random()); 557 else 558 break; 559 if (rmask->addr32[1] != 0xffffffff) 560 rpool->counter.addr32[1] = 561 htonl(arc4random()); 562 else 563 break; 564 if (rmask->addr32[0] != 0xffffffff) 565 rpool->counter.addr32[0] = 566 htonl(arc4random()); 567 break; 568 #endif /* INET6 */ 569 } 570 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 571 PF_ACPY(init_addr, naddr, af); 572 573 } else { 574 PF_AINC(&rpool->counter, af); 575 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 576 } 577 break; 578 case PF_POOL_SRCHASH: 579 { 580 unsigned char hash[16]; 581 582 hashidx = 583 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 584 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 585 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; 586 rpool->tblidx = (int)(hashidx % cnt); 587 memset(&rpool->counter, 0, sizeof(rpool->counter)); 588 if (pfr_pool_get(rpool->cur->addr.p.tbl, 589 &rpool->tblidx, &rpool->counter, af, NULL)) { 590 reason = PFRES_MAPFAILED; 591 goto done_pool_mtx; /* unsupported */ 592 } 593 PF_ACPY(naddr, &rpool->counter, af); 594 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 595 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; 596 rpool->tblidx = (int)(hashidx % cnt); 597 memset(&rpool->counter, 0, sizeof(rpool->counter)); 598 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 599 &rpool->tblidx, &rpool->counter, af, 600 pf_islinklocal)) { 601 reason = PFRES_MAPFAILED; 602 goto done_pool_mtx; /* unsupported */ 603 } 604 PF_ACPY(naddr, &rpool->counter, af); 605 } else { 606 PF_POOLMASK(naddr, raddr, rmask, 607 (struct pf_addr *)&hash, af); 608 } 609 break; 610 } 611 case PF_POOL_ROUNDROBIN: 612 { 613 struct pf_kpooladdr *acur = rpool->cur; 614 615 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 616 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 617 &rpool->tblidx, &rpool->counter, af, NULL)) 618 goto get_addr; 619 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 620 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 621 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) 622 goto get_addr; 623 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 624 goto get_addr; 625 626 try_next: 627 if (TAILQ_NEXT(rpool->cur, entries) == NULL) 628 rpool->cur = TAILQ_FIRST(&rpool->list); 629 else 630 rpool->cur = TAILQ_NEXT(rpool->cur, entries); 631 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 632 if (pfr_pool_get(rpool->cur->addr.p.tbl, 633 &rpool->tblidx, &rpool->counter, af, NULL)) { 634 /* table contains no address of type 'af' */ 635 if (rpool->cur != acur) 636 goto try_next; 637 reason = PFRES_MAPFAILED; 638 goto done_pool_mtx; 639 } 640 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 641 rpool->tblidx = -1; 642 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 643 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) { 644 /* table contains no address of type 'af' */ 645 if (rpool->cur != acur) 646 goto try_next; 647 reason = PFRES_MAPFAILED; 648 goto done_pool_mtx; 649 } 650 } else { 651 raddr = &rpool->cur->addr.v.a.addr; 652 rmask = &rpool->cur->addr.v.a.mask; 653 PF_ACPY(&rpool->counter, raddr, af); 654 } 655 656 get_addr: 657 PF_ACPY(naddr, &rpool->counter, af); 658 if (init_addr != NULL && PF_AZERO(init_addr, af)) 659 PF_ACPY(init_addr, naddr, af); 660 PF_AINC(&rpool->counter, af); 661 break; 662 } 663 } 664 665 if (nkif) 666 *nkif = rpool->cur->kif; 667 668 done_pool_mtx: 669 mtx_unlock(&rpool->mtx); 670 671 if (reason) { 672 counter_u64_add(V_pf_status.counters[reason], 1); 673 } 674 675 return (reason); 676 } 677 678 u_short 679 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, 680 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, 681 struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool, 682 pf_sn_types_t sn_type) 683 { 684 u_short reason = 0; 685 686 KASSERT(*sn == NULL, ("*sn not NULL")); 687 688 /* 689 * If this is a sticky-address rule, try to find an existing src_node. 690 * Request the sh to be unlocked if sn was not found, as we never 691 * insert a new sn when parsing the ruleset. 692 */ 693 if (rpool->opts & PF_POOL_STICKYADDR && 694 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) 695 *sn = pf_find_src_node(saddr, r, af, sh, sn_type, false); 696 697 if (*sn != NULL) { 698 PF_SRC_NODE_LOCK_ASSERT(*sn); 699 700 /* If the supplied address is the same as the current one we've 701 * been asked before, so tell the caller that there's no other 702 * address to be had. */ 703 if (PF_AEQ(naddr, &(*sn)->raddr, af)) { 704 reason = PFRES_MAPFAILED; 705 goto done; 706 } 707 708 PF_ACPY(naddr, &(*sn)->raddr, af); 709 if (nkif) 710 *nkif = (*sn)->rkif; 711 if (V_pf_status.debug >= PF_DEBUG_NOISY) { 712 printf("pf_map_addr: src tracking maps "); 713 pf_print_host(saddr, 0, af); 714 printf(" to "); 715 pf_print_host(naddr, 0, af); 716 if (nkif) 717 printf("@%s", (*nkif)->pfik_name); 718 printf("\n"); 719 } 720 goto done; 721 } 722 723 /* 724 * Source node has not been found. Find a new address and store it 725 * in variables given by the caller. 726 */ 727 if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) { 728 /* pf_map_addr() sets reason counters on its own */ 729 goto done; 730 } 731 732 if (V_pf_status.debug >= PF_DEBUG_NOISY && 733 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 734 printf("pf_map_addr: selected address "); 735 pf_print_host(naddr, 0, af); 736 if (nkif) 737 printf("@%s", (*nkif)->pfik_name); 738 printf("\n"); 739 } 740 741 done: 742 if ((*sn) != NULL) 743 PF_SRC_NODE_UNLOCK(*sn); 744 745 if (reason) { 746 counter_u64_add(V_pf_status.counters[reason], 1); 747 } 748 749 return (reason); 750 } 751 752 u_short 753 pf_get_translation(struct pf_pdesc *pd, int off, 754 struct pf_state_key **skp, struct pf_state_key **nkp, 755 struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp, 756 struct pf_udp_mapping **udp_mapping) 757 { 758 struct pf_krule *r = NULL; 759 struct pf_addr *naddr; 760 struct pf_ksrc_node *sn = NULL; 761 struct pf_srchash *sh = NULL; 762 uint16_t *nportp; 763 uint16_t low, high; 764 u_short reason; 765 766 PF_RULES_RASSERT(); 767 KASSERT(*skp == NULL, ("*skp not NULL")); 768 KASSERT(*nkp == NULL, ("*nkp not NULL")); 769 770 *rp = NULL; 771 772 if (pd->dir == PF_OUT) { 773 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack); 774 if (r == NULL) 775 r = pf_match_translation(pd, PF_RULESET_NAT, anchor_stack); 776 } else { 777 r = pf_match_translation(pd, PF_RULESET_RDR, anchor_stack); 778 if (r == NULL) 779 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack); 780 } 781 782 if (r == NULL) 783 return (PFRES_MAX); 784 785 switch (r->action) { 786 case PF_NONAT: 787 case PF_NOBINAT: 788 case PF_NORDR: 789 return (PFRES_MAX); 790 } 791 792 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, skp, nkp)) 793 return (PFRES_MEMORY); 794 795 naddr = &(*nkp)->addr[1]; 796 nportp = &(*nkp)->port[1]; 797 798 switch (r->action) { 799 case PF_NAT: 800 if (pd->proto == IPPROTO_ICMP) { 801 low = 1; 802 high = 65535; 803 } else { 804 low = r->rdr.proxy_port[0]; 805 high = r->rdr.proxy_port[1]; 806 } 807 if (r->rdr.mape.offset > 0) { 808 if (pf_get_mape_sport(pd, r, naddr, nportp, &sn, 809 &sh, udp_mapping)) { 810 DPFPRINTF(PF_DEBUG_MISC, 811 ("pf: MAP-E port allocation (%u/%u/%u)" 812 " failed\n", 813 r->rdr.mape.offset, 814 r->rdr.mape.psidlen, 815 r->rdr.mape.psid)); 816 reason = PFRES_MAPFAILED; 817 goto notrans; 818 } 819 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn, 820 &sh, &r->rdr, udp_mapping, PF_SN_NAT)) { 821 DPFPRINTF(PF_DEBUG_MISC, 822 ("pf: NAT proxy port allocation (%u-%u) failed\n", 823 r->rdr.proxy_port[0], r->rdr.proxy_port[1])); 824 reason = PFRES_MAPFAILED; 825 goto notrans; 826 } 827 break; 828 case PF_BINAT: 829 switch (pd->dir) { 830 case PF_OUT: 831 if (r->rdr.cur->addr.type == PF_ADDR_DYNIFTL){ 832 switch (pd->af) { 833 #ifdef INET 834 case AF_INET: 835 if (r->rdr.cur->addr.p.dyn-> 836 pfid_acnt4 < 1) { 837 reason = PFRES_MAPFAILED; 838 goto notrans; 839 } 840 PF_POOLMASK(naddr, 841 &r->rdr.cur->addr.p.dyn-> 842 pfid_addr4, 843 &r->rdr.cur->addr.p.dyn-> 844 pfid_mask4, &pd->nsaddr, AF_INET); 845 break; 846 #endif /* INET */ 847 #ifdef INET6 848 case AF_INET6: 849 if (r->rdr.cur->addr.p.dyn-> 850 pfid_acnt6 < 1) { 851 reason = PFRES_MAPFAILED; 852 goto notrans; 853 } 854 PF_POOLMASK(naddr, 855 &r->rdr.cur->addr.p.dyn-> 856 pfid_addr6, 857 &r->rdr.cur->addr.p.dyn-> 858 pfid_mask6, &pd->nsaddr, AF_INET6); 859 break; 860 #endif /* INET6 */ 861 } 862 } else 863 PF_POOLMASK(naddr, 864 &r->rdr.cur->addr.v.a.addr, 865 &r->rdr.cur->addr.v.a.mask, &pd->nsaddr, 866 pd->af); 867 break; 868 case PF_IN: 869 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 870 switch (pd->af) { 871 #ifdef INET 872 case AF_INET: 873 if (r->src.addr.p.dyn->pfid_acnt4 < 1) { 874 reason = PFRES_MAPFAILED; 875 goto notrans; 876 } 877 PF_POOLMASK(naddr, 878 &r->src.addr.p.dyn->pfid_addr4, 879 &r->src.addr.p.dyn->pfid_mask4, 880 &pd->ndaddr, AF_INET); 881 break; 882 #endif /* INET */ 883 #ifdef INET6 884 case AF_INET6: 885 if (r->src.addr.p.dyn->pfid_acnt6 < 1) { 886 reason = PFRES_MAPFAILED; 887 goto notrans; 888 } 889 PF_POOLMASK(naddr, 890 &r->src.addr.p.dyn->pfid_addr6, 891 &r->src.addr.p.dyn->pfid_mask6, 892 &pd->ndaddr, AF_INET6); 893 break; 894 #endif /* INET6 */ 895 } 896 } else 897 PF_POOLMASK(naddr, &r->src.addr.v.a.addr, 898 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); 899 break; 900 } 901 break; 902 case PF_RDR: { 903 struct pf_state_key_cmp key; 904 int tries; 905 uint16_t cut, low, high, nport; 906 907 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL, 908 NULL, &sn, &sh, &r->rdr, PF_SN_NAT); 909 if (reason != 0) 910 goto notrans; 911 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 912 PF_POOLMASK(naddr, naddr, &r->rdr.cur->addr.v.a.mask, 913 &pd->ndaddr, pd->af); 914 915 /* Do not change SCTP ports. */ 916 if (pd->proto == IPPROTO_SCTP) 917 break; 918 919 if (r->rdr.proxy_port[1]) { 920 uint32_t tmp_nport; 921 922 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % 923 (r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 924 1)) + r->rdr.proxy_port[0]; 925 926 /* Wrap around if necessary. */ 927 if (tmp_nport > 65535) 928 tmp_nport -= 65535; 929 nport = htons((uint16_t)tmp_nport); 930 } else if (r->rdr.proxy_port[0]) 931 nport = htons(r->rdr.proxy_port[0]); 932 else 933 nport = pd->ndport; 934 935 /* 936 * Update the destination port. 937 */ 938 *nportp = nport; 939 940 /* 941 * Do we have a source port conflict in the stack state? Try to 942 * modulate the source port if so. Note that this is racy since 943 * the state lookup may not find any matches here but will once 944 * pf_create_state() actually instantiates the state. 945 */ 946 bzero(&key, sizeof(key)); 947 key.af = pd->af; 948 key.proto = pd->proto; 949 key.port[0] = pd->nsport; 950 PF_ACPY(&key.addr[0], &pd->nsaddr, key.af); 951 key.port[1] = nport; 952 PF_ACPY(&key.addr[1], naddr, key.af); 953 954 if (!pf_find_state_all_exists(&key, PF_OUT)) 955 break; 956 957 tries = 0; 958 959 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ 960 high = 65535; 961 cut = arc4random() % (1 + high - low) + low; 962 for (uint32_t tmp = cut; 963 tmp <= high && tmp <= UINT16_MAX && 964 tries < V_pf_rdr_srcport_rewrite_tries; 965 tmp++, tries++) { 966 key.port[0] = htons(tmp); 967 if (!pf_find_state_all_exists(&key, PF_OUT)) { 968 /* Update the source port. */ 969 (*nkp)->port[0] = htons(tmp); 970 goto out; 971 } 972 } 973 for (uint32_t tmp = cut - 1; 974 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries; 975 tmp--, tries++) { 976 key.port[0] = htons(tmp); 977 if (!pf_find_state_all_exists(&key, PF_OUT)) { 978 /* Update the source port. */ 979 (*nkp)->port[0] = htons(tmp); 980 goto out; 981 } 982 } 983 984 /* 985 * We failed to find a match. Push on ahead anyway, let 986 * pf_state_insert() be the arbiter of whether the state 987 * conflict is tolerable. In particular, with TCP connections 988 * the state may be reused if the TCP state is terminal. 989 */ 990 DPFPRINTF(PF_DEBUG_MISC, 991 ("pf: RDR source port allocation failed\n")); 992 break; 993 994 out: 995 DPFPRINTF(PF_DEBUG_MISC, 996 ("pf: RDR source port allocation %u->%u\n", 997 ntohs(pd->nsport), ntohs((*nkp)->port[0]))); 998 break; 999 } 1000 default: 1001 panic("%s: unknown action %u", __func__, r->action); 1002 } 1003 1004 /* Return success only if translation really happened. */ 1005 if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) { 1006 *rp = r; 1007 return (PFRES_MATCH); 1008 } 1009 1010 reason = PFRES_MAX; 1011 notrans: 1012 uma_zfree(V_pf_state_key_z, *nkp); 1013 uma_zfree(V_pf_state_key_z, *skp); 1014 *skp = *nkp = NULL; 1015 1016 return (reason); 1017 } 1018 1019 int 1020 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) 1021 { 1022 #if defined(INET) && defined(INET6) 1023 struct pf_addr ndaddr, nsaddr, naddr; 1024 u_int16_t nport = 0; 1025 int prefixlen = 96; 1026 struct pf_srchash *sh = NULL; 1027 struct pf_ksrc_node *sns = NULL; 1028 1029 bzero(&nsaddr, sizeof(nsaddr)); 1030 bzero(&ndaddr, sizeof(ndaddr)); 1031 1032 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1033 printf("pf: af-to %s %s, ", 1034 pd->naf == AF_INET ? "inet" : "inet6", 1035 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr"); 1036 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 1037 printf(" -> "); 1038 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 1039 printf("\n"); 1040 } 1041 1042 if (TAILQ_EMPTY(&r->nat.list)) 1043 panic("pf_get_transaddr_af: no nat pool for source address"); 1044 1045 /* get source address and port */ 1046 if (pf_get_sport(pd, r, &nsaddr, &nport, 1047 r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat, 1048 NULL, PF_SN_NAT)) { 1049 DPFPRINTF(PF_DEBUG_MISC, 1050 ("pf: af-to NAT proxy port allocation (%u-%u) failed", 1051 r->nat.proxy_port[0], r->nat.proxy_port[1])); 1052 return (-1); 1053 } 1054 1055 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 1056 if (pd->dir == PF_IN) { 1057 NTOHS(pd->ndport); 1058 if (pd->ndport == ICMP6_ECHO_REQUEST) 1059 pd->ndport = ICMP_ECHO; 1060 else if (pd->ndport == ICMP6_ECHO_REPLY) 1061 pd->ndport = ICMP_ECHOREPLY; 1062 HTONS(pd->ndport); 1063 } else { 1064 NTOHS(pd->nsport); 1065 if (pd->nsport == ICMP6_ECHO_REQUEST) 1066 pd->nsport = ICMP_ECHO; 1067 else if (pd->nsport == ICMP6_ECHO_REPLY) 1068 pd->nsport = ICMP_ECHOREPLY; 1069 HTONS(pd->nsport); 1070 } 1071 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 1072 if (pd->dir == PF_IN) { 1073 NTOHS(pd->ndport); 1074 if (pd->ndport == ICMP_ECHO) 1075 pd->ndport = ICMP6_ECHO_REQUEST; 1076 else if (pd->ndport == ICMP_ECHOREPLY) 1077 pd->ndport = ICMP6_ECHO_REPLY; 1078 HTONS(pd->ndport); 1079 } else { 1080 NTOHS(pd->nsport); 1081 if (pd->nsport == ICMP_ECHO) 1082 pd->nsport = ICMP6_ECHO_REQUEST; 1083 else if (pd->nsport == ICMP_ECHOREPLY) 1084 pd->nsport = ICMP6_ECHO_REPLY; 1085 HTONS(pd->nsport); 1086 } 1087 } 1088 1089 /* get the destination address and port */ 1090 if (! TAILQ_EMPTY(&r->rdr.list)) { 1091 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL, 1092 &sns, NULL, &r->rdr, PF_SN_NAT)) 1093 return (-1); 1094 if (r->rdr.proxy_port[0]) 1095 pd->ndport = htons(r->rdr.proxy_port[0]); 1096 1097 if (pd->naf == AF_INET) { 1098 /* The prefix is the IPv4 rdr address */ 1099 prefixlen = in_mask2len( 1100 (struct in_addr *)&r->rdr.cur->addr.v.a.mask); 1101 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1102 prefixlen); 1103 } else { 1104 /* The prefix is the IPv6 rdr address */ 1105 prefixlen = in6_mask2len( 1106 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL); 1107 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr, 1108 prefixlen); 1109 } 1110 } else { 1111 if (pd->naf == AF_INET) { 1112 /* The prefix is the IPv6 dst address */ 1113 prefixlen = in6_mask2len( 1114 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL); 1115 if (prefixlen < 32) 1116 prefixlen = 96; 1117 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr, 1118 prefixlen); 1119 } else { 1120 /* 1121 * The prefix is the IPv6 nat address 1122 * (that was stored in pd->nsaddr) 1123 */ 1124 prefixlen = in6_mask2len( 1125 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL); 1126 if (prefixlen > 96) 1127 prefixlen = 96; 1128 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr, 1129 prefixlen); 1130 } 1131 } 1132 1133 PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf); 1134 PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf); 1135 1136 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1137 printf("pf: af-to %s done, prefixlen %d, ", 1138 pd->naf == AF_INET ? "inet" : "inet6", 1139 prefixlen); 1140 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 1141 printf(" -> "); 1142 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 1143 printf("\n"); 1144 } 1145 1146 return (0); 1147 #else 1148 return (-1); 1149 #endif 1150 } 1151