1 /*- 2 * Copyright (c) 2015-2016 Yandex LLC 3 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> 4 * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/counter.h> 35 #include <sys/errno.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/rmlock.h> 42 #include <sys/rwlock.h> 43 #include <sys/socket.h> 44 #include <sys/queue.h> 45 #include <sys/syslog.h> 46 #include <sys/sysctl.h> 47 48 #include <net/if.h> 49 #include <net/if_var.h> 50 #include <net/if_pflog.h> 51 #include <net/pfil.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_fw.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/ip_icmp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/udp.h> 62 #include <netinet6/in6_var.h> 63 #include <netinet6/ip6_var.h> 64 #include <netinet6/ip_fw_nat64.h> 65 66 #include <netpfil/ipfw/ip_fw_private.h> 67 #include <netpfil/ipfw/nat64/ip_fw_nat64.h> 68 #include <netpfil/ipfw/nat64/nat64lsn.h> 69 #include <netpfil/ipfw/nat64/nat64_translate.h> 70 #include <netpfil/pf/pf.h> 71 72 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); 73 74 static void nat64lsn_periodic(void *data); 75 #define PERIODIC_DELAY 4 76 static uint8_t nat64lsn_proto_map[256]; 77 uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; 78 79 #define NAT64_FLAG_FIN 0x01 /* FIN was seen */ 80 #define NAT64_FLAG_SYN 0x02 /* First syn in->out */ 81 #define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */ 82 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) 83 84 #define NAT64_FLAG_RDR 0x80 /* Port redirect */ 85 #define NAT64_LOOKUP(chain, cmd) \ 86 (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) 87 /* 88 * Delayed job queue, used to create new hosts 89 * and new portgroups 90 */ 91 enum nat64lsn_jtype { 92 JTYPE_NEWHOST = 1, 93 JTYPE_NEWPORTGROUP, 94 JTYPE_DELPORTGROUP, 95 }; 96 97 struct nat64lsn_job_item { 98 TAILQ_ENTRY(nat64lsn_job_item) next; 99 enum nat64lsn_jtype jtype; 100 struct nat64lsn_host *nh; 101 struct nat64lsn_portgroup *pg; 102 void *spare_idx; 103 struct in6_addr haddr; 104 uint8_t nat_proto; 105 uint8_t done; 106 int needs_idx; 107 int delcount; 108 unsigned int fhash; /* Flow hash */ 109 uint32_t aaddr; /* Last used address (net) */ 110 struct mbuf *m; 111 struct ipfw_flow_id f_id; 112 uint64_t delmask[NAT64LSN_PGPTRNMASK]; 113 }; 114 115 static struct mtx jmtx; 116 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF) 117 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx) 118 #define JQUEUE_LOCK() mtx_lock(&jmtx) 119 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx) 120 121 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, 122 struct nat64lsn_job_item *ji); 123 static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, 124 struct nat64lsn_job_head *jhead, int jlen); 125 126 static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg, 127 const struct ipfw_flow_id *f_id, int jtype); 128 static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, 129 const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, 130 int needs_idx); 131 static int nat64lsn_request_host(struct nat64lsn_cfg *cfg, 132 const struct ipfw_flow_id *f_id, struct mbuf **pm); 133 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, 134 const struct ipfw_flow_id *f_id, struct mbuf **pm); 135 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, 136 struct ipfw_flow_id *f_id, struct mbuf **pm); 137 138 static int alloc_portgroup(struct nat64lsn_job_item *ji); 139 static void destroy_portgroup(struct nat64lsn_portgroup *pg); 140 static void destroy_host6(struct nat64lsn_host *nh); 141 static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); 142 143 static int attach_portgroup(struct nat64lsn_cfg *cfg, 144 struct nat64lsn_job_item *ji); 145 static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); 146 147 148 /* XXX tmp */ 149 static uma_zone_t nat64lsn_host_zone; 150 static uma_zone_t nat64lsn_pg_zone; 151 static uma_zone_t nat64lsn_pgidx_zone; 152 153 static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, 154 struct nat64lsn_host *nh); 155 156 #define I6_hash(x) (djb_hash((const unsigned char *)(x), 16)) 157 #define I6_first(_ph, h) (_ph)[h] 158 #define I6_next(x) (x)->next 159 #define I6_val(x) (&(x)->addr) 160 #define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b) 161 #define I6_lock(a, b) 162 #define I6_unlock(a, b) 163 164 #define I6HASH_FIND(_cfg, _res, _a) \ 165 CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a) 166 #define I6HASH_INSERT(_cfg, _i) \ 167 CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i) 168 #define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \ 169 CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a) 170 171 #define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \ 172 CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg) 173 174 #define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8) 175 176 static unsigned 177 djb_hash(const unsigned char *h, const int len) 178 { 179 unsigned int result = 0; 180 int i; 181 182 for (i = 0; i < len; i++) 183 result = 33 * result ^ h[i]; 184 185 return (result); 186 } 187 188 /* 189 static size_t 190 bitmask_size(size_t num, int *level) 191 { 192 size_t x; 193 int c; 194 195 for (c = 0, x = num; num > 1; num /= 64, c++) 196 ; 197 198 return (x); 199 } 200 201 static void 202 bitmask_prepare(uint64_t *pmask, size_t bufsize, int level) 203 { 204 size_t x, z; 205 206 memset(pmask, 0xFF, bufsize); 207 for (x = 0, z = 1; level > 1; x += z, z *= 64, level--) 208 ; 209 pmask[x] ~= 0x01; 210 } 211 */ 212 213 static void 214 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, 215 uint32_t n, uint32_t sn) 216 { 217 218 memset(plog, 0, sizeof(*plog)); 219 plog->length = PFLOG_REAL_HDRLEN; 220 plog->af = family; 221 plog->action = PF_NAT; 222 plog->dir = PF_IN; 223 plog->rulenr = htonl(n); 224 plog->subrulenr = htonl(sn); 225 plog->ruleset[0] = '\0'; 226 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); 227 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); 228 } 229 /* 230 * Inspects icmp packets to see if the message contains different 231 * packet header so we need to alter @addr and @port. 232 */ 233 static int 234 inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr, 235 uint16_t *port) 236 { 237 struct ip *ip; 238 struct tcphdr *tcp; 239 struct udphdr *udp; 240 struct icmphdr *icmp; 241 int off; 242 uint8_t proto; 243 244 ip = mtod(*m, struct ip *); /* Outer IP header */ 245 off = (ip->ip_hl << 2) + ICMP_MINLEN; 246 if ((*m)->m_len < off) 247 *m = m_pullup(*m, off); 248 if (*m == NULL) 249 return (ENOMEM); 250 251 ip = mtod(*m, struct ip *); /* Outer IP header */ 252 icmp = L3HDR(ip, struct icmphdr *); 253 switch (icmp->icmp_type) { 254 case ICMP_ECHO: 255 case ICMP_ECHOREPLY: 256 /* Use icmp ID as distinguisher */ 257 *port = ntohs(*((uint16_t *)(icmp + 1))); 258 return (0); 259 case ICMP_UNREACH: 260 case ICMP_TIMXCEED: 261 break; 262 default: 263 return (EOPNOTSUPP); 264 } 265 /* 266 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits 267 * of ULP header. 268 */ 269 if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) 270 return (EINVAL); 271 if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) 272 *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN); 273 if (*m == NULL) 274 return (ENOMEM); 275 ip = mtodo(*m, off); /* Inner IP header */ 276 proto = ip->ip_p; 277 off += ip->ip_hl << 2; /* Skip inner IP header */ 278 *addr = ntohl(ip->ip_src.s_addr); 279 if ((*m)->m_len < off + ICMP_MINLEN) 280 *m = m_pullup(*m, off + ICMP_MINLEN); 281 if (*m == NULL) 282 return (ENOMEM); 283 switch (proto) { 284 case IPPROTO_TCP: 285 tcp = mtodo(*m, off); 286 *nat_proto = NAT_PROTO_TCP; 287 *port = ntohs(tcp->th_sport); 288 return (0); 289 case IPPROTO_UDP: 290 udp = mtodo(*m, off); 291 *nat_proto = NAT_PROTO_UDP; 292 *port = ntohs(udp->uh_sport); 293 return (0); 294 case IPPROTO_ICMP: 295 /* 296 * We will translate only ICMP errors for our ICMP 297 * echo requests. 298 */ 299 icmp = mtodo(*m, off); 300 if (icmp->icmp_type != ICMP_ECHO) 301 return (EOPNOTSUPP); 302 *port = ntohs(*((uint16_t *)(icmp + 1))); 303 return (0); 304 }; 305 return (EOPNOTSUPP); 306 } 307 308 static inline uint8_t 309 convert_tcp_flags(uint8_t flags) 310 { 311 uint8_t result; 312 313 result = flags & (TH_FIN|TH_SYN); 314 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ 315 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ 316 317 return (result); 318 } 319 320 static NAT64NOINLINE int 321 nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, 322 struct mbuf **pm) 323 { 324 struct pfloghdr loghdr, *logdata; 325 struct in6_addr src6; 326 struct nat64lsn_portgroup *pg; 327 struct nat64lsn_host *nh; 328 struct nat64lsn_state *st; 329 struct ip *ip; 330 uint32_t addr; 331 uint16_t state_flags, state_ts; 332 uint16_t port, lport; 333 uint8_t nat_proto; 334 int ret; 335 336 addr = f_id->dst_ip; 337 port = f_id->dst_port; 338 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 339 NAT64STAT_INC(&cfg->stats, nomatch4); 340 return (cfg->nomatch_verdict); 341 } 342 343 /* Check if protocol is supported and get its short id */ 344 nat_proto = nat64lsn_proto_map[f_id->proto]; 345 if (nat_proto == 0) { 346 NAT64STAT_INC(&cfg->stats, noproto); 347 return (cfg->nomatch_verdict); 348 } 349 350 /* We might need to handle icmp differently */ 351 if (nat_proto == NAT_PROTO_ICMP) { 352 ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port); 353 if (ret != 0) { 354 if (ret == ENOMEM) 355 NAT64STAT_INC(&cfg->stats, nomem); 356 else 357 NAT64STAT_INC(&cfg->stats, noproto); 358 return (cfg->nomatch_verdict); 359 } 360 /* XXX: Check addr for validity */ 361 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 362 NAT64STAT_INC(&cfg->stats, nomatch4); 363 return (cfg->nomatch_verdict); 364 } 365 } 366 367 /* Calc portgroup offset w.r.t protocol */ 368 pg = GET_PORTGROUP(cfg, addr, nat_proto, port); 369 370 /* Check if this port is occupied by any portgroup */ 371 if (pg == NULL) { 372 NAT64STAT_INC(&cfg->stats, nomatch4); 373 #if 0 374 DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port, 375 _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port)); 376 #endif 377 return (cfg->nomatch_verdict); 378 } 379 380 /* TODO: Check flags to see if we need to do some static mapping */ 381 nh = pg->host; 382 383 /* Prepare some fields we might need to update */ 384 SET_AGE(state_ts); 385 ip = mtod(*pm, struct ip *); 386 if (ip->ip_p == IPPROTO_TCP) 387 state_flags = convert_tcp_flags( 388 L3HDR(ip, struct tcphdr *)->th_flags); 389 else 390 state_flags = 0; 391 392 /* Lock host and get port mapping */ 393 NAT64_LOCK(nh); 394 395 st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)]; 396 if (st->timestamp != state_ts) 397 st->timestamp = state_ts; 398 if ((st->flags & state_flags) != state_flags) 399 st->flags |= state_flags; 400 lport = htons(st->u.s.lport); 401 402 NAT64_UNLOCK(nh); 403 404 if (cfg->flags & NAT64_LOG) { 405 logdata = &loghdr; 406 nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off); 407 } else 408 logdata = NULL; 409 410 src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0]; 411 src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1]; 412 src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2]; 413 src6.s6_addr32[3] = htonl(f_id->src_ip); 414 415 ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport, 416 &cfg->stats, logdata); 417 418 if (ret == NAT64SKIP) 419 return (IP_FW_PASS); 420 if (ret == NAT64MFREE) 421 m_freem(*pm); 422 *pm = NULL; 423 424 return (IP_FW_DENY); 425 } 426 427 void 428 nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, 429 const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, 430 const char *px, int off) 431 { 432 char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN]; 433 434 if ((nat64_debug & DP_STATE) == 0) 435 return; 436 inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s)); 437 inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a)); 438 inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d)); 439 440 DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> " 441 "%s:%d AGE %d", px, pg->idx, st, off, 442 s, st->u.s.lport, pg->nat_proto, a, pg->aport + off, 443 d, st->u.s.fport, GET_AGE(st->timestamp)); 444 } 445 446 /* 447 * Check if particular TCP state is stale and should be deleted. 448 * Return 1 if true, 0 otherwise. 449 */ 450 static int 451 nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg, 452 const struct nat64lsn_state *st, int age) 453 { 454 int ttl; 455 456 if (st->flags & NAT64_FLAG_FIN) 457 ttl = cfg->st_close_ttl; 458 else if (st->flags & NAT64_FLAG_ESTAB) 459 ttl = cfg->st_estab_ttl; 460 else if (st->flags & NAT64_FLAG_SYN) 461 ttl = cfg->st_syn_ttl; 462 else 463 ttl = cfg->st_syn_ttl; 464 465 if (age > ttl) 466 return (1); 467 return (0); 468 } 469 470 /* 471 * Check if nat state @st is stale and should be deleted. 472 * Return 1 if true, 0 otherwise. 473 */ 474 static NAT64NOINLINE int 475 nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg, 476 const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st) 477 { 478 int age, delete; 479 480 age = GET_AGE(st->timestamp); 481 delete = 0; 482 483 /* Skip immutable records */ 484 if (st->flags & NAT64_FLAG_RDR) 485 return (0); 486 487 switch (pg->nat_proto) { 488 case NAT_PROTO_TCP: 489 delete = nat64lsn_periodic_check_tcp(cfg, st, age); 490 break; 491 case NAT_PROTO_UDP: 492 if (age > cfg->st_udp_ttl) 493 delete = 1; 494 break; 495 case NAT_PROTO_ICMP: 496 if (age > cfg->st_icmp_ttl) 497 delete = 1; 498 break; 499 } 500 501 return (delete); 502 } 503 504 505 /* 506 * The following structures and functions 507 * are used to perform SLIST_FOREACH_SAFE() 508 * analog for states identified by struct st_ptr. 509 */ 510 511 struct st_idx { 512 struct nat64lsn_portgroup *pg; 513 struct nat64lsn_state *st; 514 struct st_ptr sidx_next; 515 }; 516 517 static struct st_idx * 518 st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, 519 struct st_ptr *sidx, struct st_idx *si) 520 { 521 struct nat64lsn_portgroup *pg; 522 struct nat64lsn_state *st; 523 524 if (sidx->idx == 0) { 525 memset(si, 0, sizeof(*si)); 526 return (si); 527 } 528 529 pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx); 530 st = &pg->states[sidx->off]; 531 532 si->pg = pg; 533 si->st = st; 534 si->sidx_next = st->next; 535 536 return (si); 537 } 538 539 static struct st_idx * 540 st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, 541 struct st_idx *si) 542 { 543 struct st_ptr sidx; 544 struct nat64lsn_portgroup *pg; 545 struct nat64lsn_state *st; 546 547 sidx = si->sidx_next; 548 if (sidx.idx == 0) { 549 memset(si, 0, sizeof(*si)); 550 si->st = NULL; 551 si->pg = NULL; 552 return (si); 553 } 554 555 pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); 556 st = &pg->states[sidx.off]; 557 558 si->pg = pg; 559 si->st = st; 560 si->sidx_next = st->next; 561 562 return (si); 563 } 564 565 static struct st_idx * 566 st_save_cond(struct st_idx *si_dst, struct st_idx *si) 567 { 568 if (si->st != NULL) 569 *si_dst = *si; 570 571 return (si_dst); 572 } 573 574 unsigned int 575 nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh) 576 { 577 struct st_idx si, si_prev; 578 int i; 579 unsigned int delcount; 580 581 delcount = 0; 582 for (i = 0; i < nh->hsize; i++) { 583 memset(&si_prev, 0, sizeof(si_prev)); 584 for (st_first(cfg, nh, &nh->phash[i], &si); 585 si.st != NULL; 586 st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) { 587 if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0) 588 continue; 589 nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE", 590 si.st->cur.off); 591 /* Unlink from hash */ 592 if (si_prev.st != NULL) 593 si_prev.st->next = si.st->next; 594 else 595 nh->phash[i] = si.st->next; 596 /* Delete state and free its data */ 597 PG_MARK_FREE_IDX(si.pg, si.st->cur.off); 598 memset(si.st, 0, sizeof(struct nat64lsn_state)); 599 si.st = NULL; 600 delcount++; 601 602 /* Update portgroup timestamp */ 603 SET_AGE(si.pg->timestamp); 604 } 605 } 606 NAT64STAT_ADD(&cfg->stats, sdeleted, delcount); 607 return (delcount); 608 } 609 610 /* 611 * Checks if portgroup is not used and can be deleted, 612 * Returns 1 if stale, 0 otherwise 613 */ 614 static int 615 stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg) 616 { 617 618 if (!PG_IS_EMPTY(pg)) 619 return (0); 620 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) 621 return (0); 622 return (1); 623 } 624 625 /* 626 * Checks if host record is not used and can be deleted, 627 * Returns 1 if stale, 0 otherwise 628 */ 629 static int 630 stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh) 631 { 632 633 if (nh->pg_used != 0) 634 return (0); 635 if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay) 636 return (0); 637 return (1); 638 } 639 640 struct nat64lsn_periodic_data { 641 struct nat64lsn_cfg *cfg; 642 struct nat64lsn_job_head jhead; 643 int jlen; 644 }; 645 646 static NAT64NOINLINE int 647 nat64lsn_periodic_chkhost(struct nat64lsn_host *nh, 648 struct nat64lsn_periodic_data *d) 649 { 650 char a[INET6_ADDRSTRLEN]; 651 struct nat64lsn_portgroup *pg; 652 struct nat64lsn_job_item *ji; 653 uint64_t delmask[NAT64LSN_PGPTRNMASK]; 654 int delcount, i; 655 656 delcount = 0; 657 memset(delmask, 0, sizeof(delmask)); 658 659 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 660 DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d", 661 stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu); 662 if (!stale_nh(d->cfg, nh)) { 663 /* Non-stale host. Inspect internals */ 664 NAT64_LOCK(nh); 665 666 /* Stage 1: Check&expire states */ 667 if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0) 668 SET_AGE(nh->timestamp); 669 670 /* Stage 2: Check if we need to expire */ 671 for (i = 0; i < nh->pg_used; i++) { 672 pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1); 673 if (pg == NULL) 674 continue; 675 676 /* Check if we can delete portgroup */ 677 if (stale_pg(d->cfg, pg) == 0) 678 continue; 679 680 DPRINTF(DP_JQUEUE, "Check PG %d", i); 681 delmask[i / 64] |= ((uint64_t)1 << (i % 64)); 682 delcount++; 683 } 684 685 NAT64_UNLOCK(nh); 686 if (delcount == 0) 687 return (0); 688 } 689 690 DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount); 691 /* We have something to delete - add it to queue */ 692 ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP); 693 if (ji == NULL) 694 return (0); 695 696 ji->haddr = nh->addr; 697 ji->delcount = delcount; 698 memcpy(ji->delmask, delmask, sizeof(ji->delmask)); 699 700 TAILQ_INSERT_TAIL(&d->jhead, ji, next); 701 d->jlen++; 702 return (0); 703 } 704 705 /* 706 * This procedure is used to perform various maintance 707 * on dynamic hash list. Currently it is called every second. 708 */ 709 static void 710 nat64lsn_periodic(void *data) 711 { 712 struct ip_fw_chain *ch; 713 IPFW_RLOCK_TRACKER; 714 struct nat64lsn_cfg *cfg; 715 struct nat64lsn_periodic_data d; 716 struct nat64lsn_host *nh, *tmp; 717 718 cfg = (struct nat64lsn_cfg *) data; 719 ch = cfg->ch; 720 CURVNET_SET(cfg->vp); 721 722 memset(&d, 0, sizeof(d)); 723 d.cfg = cfg; 724 TAILQ_INIT(&d.jhead); 725 726 IPFW_RLOCK(ch); 727 728 /* Stage 1: foreach host, check all its portgroups */ 729 I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d); 730 731 /* Enqueue everything we have requested */ 732 nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen); 733 734 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); 735 736 IPFW_RUNLOCK(ch); 737 738 CURVNET_RESTORE(); 739 } 740 741 static NAT64NOINLINE void 742 reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 743 { 744 745 if (ji->m == NULL) 746 return; 747 748 /* Request has failed or packet type is wrong */ 749 if (ji->f_id.addr_type != 6 || ji->done == 0) { 750 m_freem(ji->m); 751 ji->m = NULL; 752 NAT64STAT_INC(&cfg->stats, dropped); 753 DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d", 754 ji->jtype, ji->done); 755 return; 756 } 757 758 /* 759 * XXX: Limit recursion level 760 */ 761 762 NAT64STAT_INC(&cfg->stats, jreinjected); 763 DPRINTF(DP_JQUEUE, "Reinject mbuf"); 764 nat64lsn_translate6(cfg, &ji->f_id, &ji->m); 765 } 766 767 static void 768 destroy_portgroup(struct nat64lsn_portgroup *pg) 769 { 770 771 DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg); 772 uma_zfree(nat64lsn_pg_zone, pg); 773 } 774 775 static NAT64NOINLINE int 776 alloc_portgroup(struct nat64lsn_job_item *ji) 777 { 778 struct nat64lsn_portgroup *pg; 779 780 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); 781 if (pg == NULL) 782 return (1); 783 784 if (ji->needs_idx != 0) { 785 ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); 786 /* Failed alloc isn't always fatal, so don't check */ 787 } 788 memset(&pg->freemask, 0xFF, sizeof(pg->freemask)); 789 pg->nat_proto = ji->nat_proto; 790 ji->pg = pg; 791 return (0); 792 793 } 794 795 static void 796 destroy_host6(struct nat64lsn_host *nh) 797 { 798 char a[INET6_ADDRSTRLEN]; 799 int i; 800 801 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 802 DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh, 803 nh->pg_used); 804 NAT64_LOCK_DESTROY(nh); 805 for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++) 806 uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i)); 807 uma_zfree(nat64lsn_host_zone, nh); 808 } 809 810 static NAT64NOINLINE int 811 alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 812 { 813 struct nat64lsn_host *nh; 814 char a[INET6_ADDRSTRLEN]; 815 816 nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); 817 if (nh == NULL) 818 return (1); 819 PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); 820 if (PORTGROUP_CHUNK(nh, 0) == NULL) { 821 uma_zfree(nat64lsn_host_zone, nh); 822 return (2); 823 } 824 if (alloc_portgroup(ji) != 0) { 825 NAT64STAT_INC(&cfg->stats, jportfails); 826 uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0)); 827 uma_zfree(nat64lsn_host_zone, nh); 828 return (3); 829 } 830 831 NAT64_LOCK_INIT(nh); 832 nh->addr = ji->haddr; 833 nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */ 834 nh->pg_allocated = NAT64LSN_PGIDX_CHUNK; 835 nh->pg_used = 0; 836 ji->nh = nh; 837 838 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 839 DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh); 840 return (0); 841 } 842 843 /* 844 * Finds free @pg index inside @nh 845 */ 846 static NAT64NOINLINE int 847 find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx) 848 { 849 int i; 850 851 for (i = 0; i < nh->pg_allocated; i++) { 852 if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) { 853 *idx = i; 854 return (0); 855 } 856 } 857 return (1); 858 } 859 860 static NAT64NOINLINE int 861 attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 862 { 863 char a[INET6_ADDRSTRLEN]; 864 struct nat64lsn_host *nh; 865 866 I6HASH_FIND(cfg, nh, &ji->haddr); 867 if (nh == NULL) { 868 /* Add new host to list */ 869 nh = ji->nh; 870 I6HASH_INSERT(cfg, nh); 871 cfg->ihcount++; 872 ji->nh = NULL; 873 874 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 875 DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh); 876 /* 877 * Try to add portgroup. 878 * Note it will automatically set 879 * 'done' on ji if successful. 880 */ 881 if (attach_portgroup(cfg, ji) != 0) { 882 DPRINTF(DP_DROPS, "%s %p failed to attach PG", 883 a, nh); 884 NAT64STAT_INC(&cfg->stats, jportfails); 885 return (1); 886 } 887 return (0); 888 } 889 890 /* 891 * nh isn't NULL. This probably means we had several simultaneous 892 * host requests. The previous one request has already attached 893 * this host. Requeue attached mbuf and mark job as done, but 894 * leave nh and pg pointers not changed, so nat64lsn_do_request() 895 * will release all allocated resources. 896 */ 897 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 898 DPRINTF(DP_OBJ, "%s %p is already attached as %p", 899 a, ji->nh, nh); 900 ji->done = 1; 901 return (0); 902 } 903 904 static NAT64NOINLINE int 905 find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off, 906 int nat_proto, uint16_t *aport, int *ppg_idx) 907 { 908 int j, pg_idx; 909 910 pg_idx = addr_off * _ADDR_PG_COUNT + 911 (nat_proto - 1) * _ADDR_PG_PROTO_COUNT; 912 913 for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) { 914 if (cfg->pg[pg_idx + j] != NULL) 915 continue; 916 917 *aport = j * NAT64_CHUNK_SIZE; 918 *ppg_idx = pg_idx + j; 919 return (1); 920 } 921 922 return (0); 923 } 924 925 /* 926 * XXX: This function needs to be rewritten to 927 * use free bitmask for faster pg finding, 928 * additionally, it should take into consideration 929 * a) randomization and 930 * b) previous addresses allocated to given nat instance 931 * 932 */ 933 static NAT64NOINLINE int 934 find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji, 935 uint32_t *aaddr, uint16_t *aport, int *ppg_idx) 936 { 937 int i, nat_proto; 938 939 /* 940 * XXX: Use bitmask index to be able to find/check if IP address 941 * has some spare pg's 942 */ 943 nat_proto = ji->nat_proto; 944 945 /* First, try to use same address */ 946 if (ji->aaddr != 0) { 947 i = ntohl(ji->aaddr) - cfg->prefix4; 948 if (find_pg_place_addr(cfg, i, nat_proto, aport, 949 ppg_idx) != 0){ 950 /* Found! */ 951 *aaddr = htonl(cfg->prefix4 + i); 952 return (0); 953 } 954 } 955 956 /* Next, try to use random address based on flow hash */ 957 i = ji->fhash % (1 << (32 - cfg->plen4)); 958 if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) { 959 /* Found! */ 960 *aaddr = htonl(cfg->prefix4 + i); 961 return (0); 962 } 963 964 965 /* Last one: simply find ANY available */ 966 for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { 967 if (find_pg_place_addr(cfg, i, nat_proto, aport, 968 ppg_idx) != 0){ 969 /* Found! */ 970 *aaddr = htonl(cfg->prefix4 + i); 971 return (0); 972 } 973 } 974 975 return (1); 976 } 977 978 static NAT64NOINLINE int 979 attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 980 { 981 char a[INET6_ADDRSTRLEN]; 982 struct nat64lsn_portgroup *pg; 983 struct nat64lsn_host *nh; 984 uint32_t aaddr; 985 uint16_t aport; 986 int nh_pg_idx, pg_idx; 987 988 pg = ji->pg; 989 990 /* 991 * Find source host and bind: we can't rely on 992 * pg->host 993 */ 994 I6HASH_FIND(cfg, nh, &ji->haddr); 995 if (nh == NULL) 996 return (1); 997 998 /* Find spare port chunk */ 999 if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) { 1000 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 1001 DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a); 1002 return (2); 1003 } 1004 1005 /* Expand PG indexes if needed */ 1006 if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) { 1007 PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) = 1008 ji->spare_idx; 1009 nh->pg_allocated += NAT64LSN_PGIDX_CHUNK; 1010 ji->spare_idx = NULL; 1011 } 1012 1013 /* Find empty index to store PG in the @nh */ 1014 if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) { 1015 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 1016 DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s", 1017 a); 1018 return (3); 1019 } 1020 1021 cfg->pg[pg_idx] = pg; 1022 cfg->protochunks[pg->nat_proto]++; 1023 NAT64STAT_INC(&cfg->stats, spgcreated); 1024 1025 pg->aaddr = aaddr; 1026 pg->aport = aport; 1027 pg->host = nh; 1028 pg->idx = pg_idx; 1029 SET_AGE(pg->timestamp); 1030 1031 PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg; 1032 if (nh->pg_used == nh_pg_idx) 1033 nh->pg_used++; 1034 SET_AGE(nh->timestamp); 1035 1036 ji->pg = NULL; 1037 ji->done = 1; 1038 1039 return (0); 1040 } 1041 1042 static NAT64NOINLINE void 1043 consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1044 { 1045 struct nat64lsn_host *nh, *nh_tmp; 1046 struct nat64lsn_portgroup *pg, *pg_list[256]; 1047 int i, pg_lidx, idx; 1048 1049 /* Find source host */ 1050 I6HASH_FIND(cfg, nh, &ji->haddr); 1051 if (nh == NULL || nh->pg_used == 0) 1052 return; 1053 1054 memset(pg_list, 0, sizeof(pg_list)); 1055 pg_lidx = 0; 1056 1057 NAT64_LOCK(nh); 1058 1059 for (i = nh->pg_used - 1; i >= 0; i--) { 1060 if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0) 1061 continue; 1062 pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); 1063 1064 /* Check that PG isn't busy. */ 1065 if (stale_pg(cfg, pg) == 0) 1066 continue; 1067 1068 /* DO delete */ 1069 pg_list[pg_lidx++] = pg; 1070 PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL; 1071 1072 idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto, 1073 pg->aport); 1074 KASSERT(cfg->pg[idx] == pg, ("Non matched pg")); 1075 cfg->pg[idx] = NULL; 1076 cfg->protochunks[pg->nat_proto]--; 1077 NAT64STAT_INC(&cfg->stats, spgdeleted); 1078 1079 /* Decrease pg_used */ 1080 while (nh->pg_used > 0 && 1081 PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL) 1082 nh->pg_used--; 1083 1084 /* Check if on-stack buffer has ended */ 1085 if (pg_lidx == nitems(pg_list)) 1086 break; 1087 } 1088 1089 NAT64_UNLOCK(nh); 1090 1091 if (stale_nh(cfg, nh)) { 1092 I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr); 1093 KASSERT(nh != NULL, ("Unable to find address")); 1094 cfg->ihcount--; 1095 ji->nh = nh; 1096 I6HASH_FIND(cfg, nh, &ji->haddr); 1097 KASSERT(nh == NULL, ("Failed to delete address")); 1098 } 1099 1100 /* TODO: Delay freeing portgroups */ 1101 while (pg_lidx > 0) { 1102 pg_lidx--; 1103 NAT64STAT_INC(&cfg->stats, spgdeleted); 1104 destroy_portgroup(pg_list[pg_lidx]); 1105 } 1106 } 1107 1108 /* 1109 * Main request handler. 1110 * Responsible for handling jqueue, e.g. 1111 * creating new hosts, addind/deleting portgroups. 1112 */ 1113 static NAT64NOINLINE void 1114 nat64lsn_do_request(void *data) 1115 { 1116 IPFW_RLOCK_TRACKER; 1117 struct nat64lsn_job_head jhead; 1118 struct nat64lsn_job_item *ji; 1119 int jcount, nhsize; 1120 struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data; 1121 struct ip_fw_chain *ch; 1122 int delcount; 1123 1124 CURVNET_SET(cfg->vp); 1125 1126 TAILQ_INIT(&jhead); 1127 1128 /* XXX: We're running unlocked here */ 1129 1130 ch = cfg->ch; 1131 delcount = 0; 1132 IPFW_RLOCK(ch); 1133 1134 /* Grab queue */ 1135 JQUEUE_LOCK(); 1136 TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next); 1137 jcount = cfg->jlen; 1138 cfg->jlen = 0; 1139 JQUEUE_UNLOCK(); 1140 1141 /* check if we need to resize hash */ 1142 nhsize = 0; 1143 if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) { 1144 nhsize = cfg->ihsize; 1145 for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2) 1146 ; 1147 } else if (cfg->ihcount < cfg->ihsize * 4) { 1148 nhsize = cfg->ihsize; 1149 for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2) 1150 ; 1151 } 1152 1153 IPFW_RUNLOCK(ch); 1154 1155 if (TAILQ_EMPTY(&jhead)) { 1156 CURVNET_RESTORE(); 1157 return; 1158 } 1159 1160 NAT64STAT_INC(&cfg->stats, jcalls); 1161 DPRINTF(DP_JQUEUE, "count=%d", jcount); 1162 1163 /* 1164 * TODO: 1165 * What we should do here is to build a hash 1166 * to ensure we don't have lots of duplicate requests. 1167 * Skip this for now. 1168 * 1169 * TODO: Limit per-call number of items 1170 */ 1171 1172 /* Pre-allocate everything for entire chain */ 1173 TAILQ_FOREACH(ji, &jhead, next) { 1174 switch (ji->jtype) { 1175 case JTYPE_NEWHOST: 1176 if (alloc_host6(cfg, ji) != 0) 1177 NAT64STAT_INC(&cfg->stats, jhostfails); 1178 break; 1179 case JTYPE_NEWPORTGROUP: 1180 if (alloc_portgroup(ji) != 0) 1181 NAT64STAT_INC(&cfg->stats, jportfails); 1182 break; 1183 case JTYPE_DELPORTGROUP: 1184 delcount += ji->delcount; 1185 break; 1186 default: 1187 break; 1188 } 1189 } 1190 1191 /* 1192 * TODO: Alloc hew hash 1193 */ 1194 nhsize = 0; 1195 if (nhsize > 0) { 1196 /* XXX: */ 1197 } 1198 1199 /* Apply all changes in batch */ 1200 IPFW_UH_WLOCK(ch); 1201 IPFW_WLOCK(ch); 1202 1203 TAILQ_FOREACH(ji, &jhead, next) { 1204 switch (ji->jtype) { 1205 case JTYPE_NEWHOST: 1206 if (ji->nh != NULL) 1207 attach_host6(cfg, ji); 1208 break; 1209 case JTYPE_NEWPORTGROUP: 1210 if (ji->pg != NULL && 1211 attach_portgroup(cfg, ji) != 0) 1212 NAT64STAT_INC(&cfg->stats, jportfails); 1213 break; 1214 case JTYPE_DELPORTGROUP: 1215 consider_del_portgroup(cfg, ji); 1216 break; 1217 } 1218 } 1219 1220 if (nhsize > 0) { 1221 /* XXX: Move everything to new hash */ 1222 } 1223 1224 IPFW_WUNLOCK(ch); 1225 IPFW_UH_WUNLOCK(ch); 1226 1227 /* Flush unused entries */ 1228 while (!TAILQ_EMPTY(&jhead)) { 1229 ji = TAILQ_FIRST(&jhead); 1230 TAILQ_REMOVE(&jhead, ji, next); 1231 if (ji->nh != NULL) 1232 destroy_host6(ji->nh); 1233 if (ji->pg != NULL) 1234 destroy_portgroup(ji->pg); 1235 if (ji->m != NULL) 1236 reinject_mbuf(cfg, ji); 1237 if (ji->spare_idx != NULL) 1238 uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx); 1239 free(ji, M_IPFW); 1240 } 1241 CURVNET_RESTORE(); 1242 } 1243 1244 static NAT64NOINLINE struct nat64lsn_job_item * 1245 nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, 1246 int jtype) 1247 { 1248 struct nat64lsn_job_item *ji; 1249 struct in6_addr haddr; 1250 uint8_t nat_proto; 1251 1252 /* 1253 * Do not try to lock possibly contested mutex if we're near the limit. 1254 * Drop packet instead. 1255 */ 1256 if (cfg->jlen >= cfg->jmaxlen) { 1257 NAT64STAT_INC(&cfg->stats, jmaxlen); 1258 return (NULL); 1259 } 1260 1261 memset(&haddr, 0, sizeof(haddr)); 1262 nat_proto = 0; 1263 if (f_id != NULL) { 1264 haddr = f_id->src_ip6; 1265 nat_proto = nat64lsn_proto_map[f_id->proto]; 1266 1267 DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d", 1268 nat_proto, f_id->proto); 1269 1270 if (nat_proto == 0) 1271 return (NULL); 1272 } 1273 1274 ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW, 1275 M_NOWAIT | M_ZERO); 1276 1277 if (ji == NULL) { 1278 NAT64STAT_INC(&cfg->stats, jnomem); 1279 return (NULL); 1280 } 1281 1282 ji->jtype = jtype; 1283 1284 if (f_id != NULL) { 1285 ji->f_id = *f_id; 1286 ji->haddr = haddr; 1287 ji->nat_proto = nat_proto; 1288 } 1289 1290 return (ji); 1291 } 1292 1293 static NAT64NOINLINE void 1294 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1295 { 1296 1297 if (ji == NULL) 1298 return; 1299 1300 JQUEUE_LOCK(); 1301 TAILQ_INSERT_TAIL(&cfg->jhead, ji, next); 1302 cfg->jlen++; 1303 NAT64STAT_INC(&cfg->stats, jrequests); 1304 1305 if (callout_pending(&cfg->jcallout) == 0) 1306 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); 1307 JQUEUE_UNLOCK(); 1308 } 1309 1310 static NAT64NOINLINE void 1311 nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, 1312 struct nat64lsn_job_head *jhead, int jlen) 1313 { 1314 1315 if (TAILQ_EMPTY(jhead)) 1316 return; 1317 1318 /* Attach current queue to execution one */ 1319 JQUEUE_LOCK(); 1320 TAILQ_CONCAT(&cfg->jhead, jhead, next); 1321 cfg->jlen += jlen; 1322 NAT64STAT_ADD(&cfg->stats, jrequests, jlen); 1323 1324 if (callout_pending(&cfg->jcallout) == 0) 1325 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); 1326 JQUEUE_UNLOCK(); 1327 } 1328 1329 static unsigned int 1330 flow6_hash(const struct ipfw_flow_id *f_id) 1331 { 1332 unsigned char hbuf[36]; 1333 1334 memcpy(hbuf, &f_id->dst_ip6, 16); 1335 memcpy(&hbuf[16], &f_id->src_ip6, 16); 1336 memcpy(&hbuf[32], &f_id->dst_port, 2); 1337 memcpy(&hbuf[32], &f_id->src_port, 2); 1338 1339 return (djb_hash(hbuf, sizeof(hbuf))); 1340 } 1341 1342 static NAT64NOINLINE int 1343 nat64lsn_request_host(struct nat64lsn_cfg *cfg, 1344 const struct ipfw_flow_id *f_id, struct mbuf **pm) 1345 { 1346 struct nat64lsn_job_item *ji; 1347 struct mbuf *m; 1348 1349 m = *pm; 1350 *pm = NULL; 1351 1352 ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST); 1353 if (ji == NULL) { 1354 m_freem(m); 1355 NAT64STAT_INC(&cfg->stats, dropped); 1356 DPRINTF(DP_DROPS, "failed to create job"); 1357 } else { 1358 ji->m = m; 1359 /* Provide pseudo-random value based on flow */ 1360 ji->fhash = flow6_hash(f_id); 1361 nat64lsn_enqueue_job(cfg, ji); 1362 NAT64STAT_INC(&cfg->stats, jhostsreq); 1363 } 1364 1365 return (IP_FW_PASS); 1366 } 1367 1368 static NAT64NOINLINE int 1369 nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, 1370 const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, 1371 int needs_idx) 1372 { 1373 struct nat64lsn_job_item *ji; 1374 struct mbuf *m; 1375 1376 m = *pm; 1377 *pm = NULL; 1378 1379 ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP); 1380 if (ji == NULL) { 1381 m_freem(m); 1382 NAT64STAT_INC(&cfg->stats, dropped); 1383 DPRINTF(DP_DROPS, "failed to create job"); 1384 } else { 1385 ji->m = m; 1386 /* Provide pseudo-random value based on flow */ 1387 ji->fhash = flow6_hash(f_id); 1388 ji->aaddr = aaddr; 1389 ji->needs_idx = needs_idx; 1390 nat64lsn_enqueue_job(cfg, ji); 1391 NAT64STAT_INC(&cfg->stats, jportreq); 1392 } 1393 1394 return (IP_FW_PASS); 1395 } 1396 1397 static NAT64NOINLINE struct nat64lsn_state * 1398 nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, 1399 int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr) 1400 { 1401 struct nat64lsn_portgroup *pg; 1402 struct nat64lsn_state *st; 1403 int i, hval, off; 1404 1405 /* XXX: create additional bitmask for selecting proper portgroup */ 1406 for (i = 0; i < nh->pg_used; i++) { 1407 pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); 1408 if (pg == NULL) 1409 continue; 1410 if (*aaddr == 0) 1411 *aaddr = pg->aaddr; 1412 if (pg->nat_proto != nat_proto) 1413 continue; 1414 1415 off = PG_GET_FREE_IDX(pg); 1416 if (off != 0) { 1417 /* We have found spare state. Use it */ 1418 off--; 1419 PG_MARK_BUSY_IDX(pg, off); 1420 st = &pg->states[off]; 1421 1422 /* 1423 * Fill in new info. Assume state was zeroed. 1424 * Timestamp and flags will be filled by caller. 1425 */ 1426 st->u.s = kst->u.s; 1427 st->cur.idx = i + 1; 1428 st->cur.off = off; 1429 1430 /* Insert into host hash table */ 1431 hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1); 1432 st->next = nh->phash[hval]; 1433 nh->phash[hval] = st->cur; 1434 1435 nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off); 1436 1437 NAT64STAT_INC(&cfg->stats, screated); 1438 1439 return (st); 1440 } 1441 /* Saev last used alias affress */ 1442 *aaddr = pg->aaddr; 1443 } 1444 1445 return (NULL); 1446 } 1447 1448 static NAT64NOINLINE int 1449 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, 1450 struct mbuf **pm) 1451 { 1452 struct pfloghdr loghdr, *logdata; 1453 char a[INET6_ADDRSTRLEN]; 1454 struct nat64lsn_host *nh; 1455 struct st_ptr sidx; 1456 struct nat64lsn_state *st, kst; 1457 struct nat64lsn_portgroup *pg; 1458 struct icmp6_hdr *icmp6; 1459 uint32_t aaddr; 1460 int action, hval, nat_proto, proto; 1461 uint16_t aport, state_ts, state_flags; 1462 1463 /* Check if af/protocol is supported and get it short id */ 1464 nat_proto = nat64lsn_proto_map[f_id->proto]; 1465 if (nat_proto == 0) { 1466 /* 1467 * Since we can be called from jobs handler, we need 1468 * to free mbuf by self, do not leave this task to 1469 * ipfw_check_packet(). 1470 */ 1471 NAT64STAT_INC(&cfg->stats, noproto); 1472 m_freem(*pm); 1473 *pm = NULL; 1474 return (IP_FW_DENY); 1475 } 1476 1477 /* Try to find host first */ 1478 I6HASH_FIND(cfg, nh, &f_id->src_ip6); 1479 1480 if (nh == NULL) 1481 return (nat64lsn_request_host(cfg, f_id, pm)); 1482 1483 /* Fill-in on-stack state structure */ 1484 kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3]; 1485 kst.u.s.fport = f_id->dst_port; 1486 kst.u.s.lport = f_id->src_port; 1487 1488 /* Prepare some fields we might need to update */ 1489 hval = 0; 1490 proto = nat64_getlasthdr(*pm, &hval); 1491 if (proto < 0) { 1492 NAT64STAT_INC(&cfg->stats, dropped); 1493 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1494 m_freem(*pm); 1495 *pm = NULL; 1496 return (IP_FW_DENY); 1497 } 1498 1499 SET_AGE(state_ts); 1500 if (proto == IPPROTO_TCP) 1501 state_flags = convert_tcp_flags( 1502 TCP(mtodo(*pm, hval))->th_flags); 1503 else 1504 state_flags = 0; 1505 if (proto == IPPROTO_ICMPV6) { 1506 /* Alter local port data */ 1507 icmp6 = mtodo(*pm, hval); 1508 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || 1509 icmp6->icmp6_type == ICMP6_ECHO_REPLY) 1510 kst.u.s.lport = ntohs(icmp6->icmp6_id); 1511 } 1512 1513 hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1); 1514 pg = NULL; 1515 st = NULL; 1516 1517 /* OK, let's find state in host hash */ 1518 NAT64_LOCK(nh); 1519 sidx = nh->phash[hval]; 1520 int k = 0; 1521 while (sidx.idx != 0) { 1522 pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); 1523 st = &pg->states[sidx.off]; 1524 //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off, 1525 //st->next.idx, st->next.off); 1526 if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto) 1527 break; 1528 if (k++ > 1000) { 1529 DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n", 1530 sidx.idx, sidx.off, st->next.idx, st->next.off); 1531 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 1532 DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d", 1533 a, nh, curcpu); 1534 k = 0; 1535 } 1536 sidx = st->next; 1537 } 1538 1539 if (sidx.idx == 0) { 1540 aaddr = 0; 1541 st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr); 1542 if (st == NULL) { 1543 /* No free states. Request more if we can */ 1544 if (nh->pg_used >= cfg->max_chunks) { 1545 /* Limit reached */ 1546 NAT64STAT_INC(&cfg->stats, dropped); 1547 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 1548 DPRINTF(DP_DROPS, "PG limit reached " 1549 " for host %s (used %u, allocated %u, " 1550 "limit %u)", a, 1551 nh->pg_used * NAT64_CHUNK_SIZE, 1552 nh->pg_allocated * NAT64_CHUNK_SIZE, 1553 cfg->max_chunks * NAT64_CHUNK_SIZE); 1554 m_freem(*pm); 1555 *pm = NULL; 1556 NAT64_UNLOCK(nh); 1557 return (IP_FW_DENY); 1558 } 1559 if ((nh->pg_allocated <= 1560 nh->pg_used + NAT64LSN_REMAININGPG) && 1561 nh->pg_allocated < cfg->max_chunks) 1562 action = 1; /* Request new indexes */ 1563 else 1564 action = 0; 1565 NAT64_UNLOCK(nh); 1566 //DPRINTF("No state, unlock for %p", nh); 1567 return (nat64lsn_request_portgroup(cfg, f_id, 1568 pm, aaddr, action)); 1569 } 1570 1571 /* We've got new state. */ 1572 sidx = st->cur; 1573 pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); 1574 } 1575 1576 /* Okay, state found */ 1577 1578 /* Update necessary fileds */ 1579 if (st->timestamp != state_ts) 1580 st->timestamp = state_ts; 1581 if ((st->flags & state_flags) != 0) 1582 st->flags |= state_flags; 1583 1584 /* Copy needed state data */ 1585 aaddr = pg->aaddr; 1586 aport = htons(pg->aport + sidx.off); 1587 1588 NAT64_UNLOCK(nh); 1589 1590 if (cfg->flags & NAT64_LOG) { 1591 logdata = &loghdr; 1592 nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off); 1593 } else 1594 logdata = NULL; 1595 1596 action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata); 1597 if (action == NAT64SKIP) 1598 return (IP_FW_PASS); 1599 if (action == NAT64MFREE) 1600 m_freem(*pm); 1601 *pm = NULL; /* mark mbuf as consumed */ 1602 return (IP_FW_DENY); 1603 } 1604 1605 /* 1606 * Main dataplane entry point. 1607 */ 1608 int 1609 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, 1610 ipfw_insn *cmd, int *done) 1611 { 1612 ipfw_insn *icmd; 1613 struct nat64lsn_cfg *cfg; 1614 int ret; 1615 1616 IPFW_RLOCK_ASSERT(ch); 1617 1618 *done = 1; /* terminate the search */ 1619 icmd = cmd + 1; 1620 if (cmd->opcode != O_EXTERNAL_ACTION || 1621 cmd->arg1 != V_nat64lsn_eid || 1622 icmd->opcode != O_EXTERNAL_INSTANCE || 1623 (cfg = NAT64_LOOKUP(ch, icmd)) == NULL) 1624 return (0); 1625 1626 switch (args->f_id.addr_type) { 1627 case 4: 1628 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); 1629 break; 1630 case 6: 1631 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); 1632 break; 1633 default: 1634 return (0); 1635 } 1636 return (ret); 1637 } 1638 1639 static int 1640 nat64lsn_ctor_host(void *mem, int size, void *arg, int flags) 1641 { 1642 struct nat64lsn_host *nh; 1643 1644 nh = (struct nat64lsn_host *)mem; 1645 memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr)); 1646 memset(nh->phash, 0, sizeof(nh->phash)); 1647 return (0); 1648 } 1649 1650 static int 1651 nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags) 1652 { 1653 1654 memset(mem, 0, size); 1655 return (0); 1656 } 1657 1658 void 1659 nat64lsn_init_internal(void) 1660 { 1661 1662 memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map)); 1663 /* Set up supported protocol map */ 1664 nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP; 1665 nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP; 1666 nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP; 1667 nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP; 1668 /* Fill in reverse proto map */ 1669 memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map)); 1670 nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP; 1671 nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP; 1672 nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6; 1673 1674 JQUEUE_LOCK_INIT(); 1675 nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone", 1676 sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL, 1677 NULL, NULL, UMA_ALIGN_PTR, 0); 1678 nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone", 1679 sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL, 1680 UMA_ALIGN_PTR, 0); 1681 nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone", 1682 sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK, 1683 nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1684 } 1685 1686 void 1687 nat64lsn_uninit_internal(void) 1688 { 1689 1690 JQUEUE_LOCK_DESTROY(); 1691 uma_zdestroy(nat64lsn_host_zone); 1692 uma_zdestroy(nat64lsn_pg_zone); 1693 uma_zdestroy(nat64lsn_pgidx_zone); 1694 } 1695 1696 void 1697 nat64lsn_start_instance(struct nat64lsn_cfg *cfg) 1698 { 1699 1700 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, 1701 nat64lsn_periodic, cfg); 1702 } 1703 1704 struct nat64lsn_cfg * 1705 nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr) 1706 { 1707 struct nat64lsn_cfg *cfg; 1708 1709 cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO); 1710 TAILQ_INIT(&cfg->jhead); 1711 cfg->vp = curvnet; 1712 cfg->ch = ch; 1713 COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK); 1714 1715 cfg->ihsize = NAT64LSN_HSIZE; 1716 cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW, 1717 M_WAITOK | M_ZERO); 1718 1719 cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW, 1720 M_WAITOK | M_ZERO); 1721 1722 callout_init(&cfg->periodic, CALLOUT_MPSAFE); 1723 callout_init(&cfg->jcallout, CALLOUT_MPSAFE); 1724 1725 return (cfg); 1726 } 1727 1728 /* 1729 * Destroy all hosts callback. 1730 * Called on module unload when all activity already finished, so 1731 * can work without any locks. 1732 */ 1733 static NAT64NOINLINE int 1734 nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg) 1735 { 1736 struct nat64lsn_portgroup *pg; 1737 int i; 1738 1739 for (i = nh->pg_used; i > 0; i--) { 1740 pg = PORTGROUP_BYSIDX(cfg, nh, i); 1741 if (pg == NULL) 1742 continue; 1743 cfg->pg[pg->idx] = NULL; 1744 destroy_portgroup(pg); 1745 nh->pg_used--; 1746 } 1747 destroy_host6(nh); 1748 cfg->ihcount--; 1749 return (0); 1750 } 1751 1752 void 1753 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) 1754 { 1755 struct nat64lsn_host *nh, *tmp; 1756 1757 JQUEUE_LOCK(); 1758 callout_drain(&cfg->jcallout); 1759 JQUEUE_UNLOCK(); 1760 1761 callout_drain(&cfg->periodic); 1762 I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg); 1763 DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount); 1764 1765 COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS); 1766 free(cfg->ih, M_IPFW); 1767 free(cfg->pg, M_IPFW); 1768 free(cfg, M_IPFW); 1769 } 1770 1771