1 /*- 2 * Copyright (c) 2015-2016 Yandex LLC 3 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> 4 * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/counter.h> 35 #include <sys/errno.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/module.h> 41 #include <sys/rmlock.h> 42 #include <sys/rwlock.h> 43 #include <sys/socket.h> 44 #include <sys/queue.h> 45 #include <sys/syslog.h> 46 #include <sys/sysctl.h> 47 48 #include <net/if.h> 49 #include <net/if_var.h> 50 #include <net/if_pflog.h> 51 #include <net/pfil.h> 52 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_fw.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/ip_icmp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/udp.h> 62 #include <netinet6/in6_var.h> 63 #include <netinet6/ip6_var.h> 64 #include <netinet6/ip_fw_nat64.h> 65 66 #include <netpfil/ipfw/ip_fw_private.h> 67 #include <netpfil/pf/pf.h> 68 69 #include "nat64lsn.h" 70 71 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); 72 73 static void nat64lsn_periodic(void *data); 74 #define PERIODIC_DELAY 4 75 static uint8_t nat64lsn_proto_map[256]; 76 uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; 77 78 #define NAT64_FLAG_FIN 0x01 /* FIN was seen */ 79 #define NAT64_FLAG_SYN 0x02 /* First syn in->out */ 80 #define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */ 81 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) 82 83 #define NAT64_FLAG_RDR 0x80 /* Port redirect */ 84 #define NAT64_LOOKUP(chain, cmd) \ 85 (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) 86 /* 87 * Delayed job queue, used to create new hosts 88 * and new portgroups 89 */ 90 enum nat64lsn_jtype { 91 JTYPE_NEWHOST = 1, 92 JTYPE_NEWPORTGROUP, 93 JTYPE_DELPORTGROUP, 94 }; 95 96 struct nat64lsn_job_item { 97 TAILQ_ENTRY(nat64lsn_job_item) next; 98 enum nat64lsn_jtype jtype; 99 struct nat64lsn_host *nh; 100 struct nat64lsn_portgroup *pg; 101 void *spare_idx; 102 struct in6_addr haddr; 103 uint8_t nat_proto; 104 uint8_t done; 105 int needs_idx; 106 int delcount; 107 unsigned int fhash; /* Flow hash */ 108 uint32_t aaddr; /* Last used address (net) */ 109 struct mbuf *m; 110 struct ipfw_flow_id f_id; 111 uint64_t delmask[NAT64LSN_PGPTRNMASK]; 112 }; 113 114 static struct mtx jmtx; 115 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF) 116 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx) 117 #define JQUEUE_LOCK() mtx_lock(&jmtx) 118 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx) 119 120 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, 121 struct nat64lsn_job_item *ji); 122 static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, 123 struct nat64lsn_job_head *jhead, int jlen); 124 125 static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg, 126 const struct ipfw_flow_id *f_id, int jtype); 127 static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, 128 const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, 129 int needs_idx); 130 static int nat64lsn_request_host(struct nat64lsn_cfg *cfg, 131 const struct ipfw_flow_id *f_id, struct mbuf **pm); 132 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, 133 const struct ipfw_flow_id *f_id, struct mbuf **pm); 134 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, 135 struct ipfw_flow_id *f_id, struct mbuf **pm); 136 137 static int alloc_portgroup(struct nat64lsn_job_item *ji); 138 static void destroy_portgroup(struct nat64lsn_portgroup *pg); 139 static void destroy_host6(struct nat64lsn_host *nh); 140 static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); 141 142 static int attach_portgroup(struct nat64lsn_cfg *cfg, 143 struct nat64lsn_job_item *ji); 144 static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); 145 146 147 /* XXX tmp */ 148 static uma_zone_t nat64lsn_host_zone; 149 static uma_zone_t nat64lsn_pg_zone; 150 static uma_zone_t nat64lsn_pgidx_zone; 151 152 static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, 153 struct nat64lsn_host *nh); 154 155 #define I6_hash(x) (djb_hash((const unsigned char *)(x), 16)) 156 #define I6_first(_ph, h) (_ph)[h] 157 #define I6_next(x) (x)->next 158 #define I6_val(x) (&(x)->addr) 159 #define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b) 160 #define I6_lock(a, b) 161 #define I6_unlock(a, b) 162 163 #define I6HASH_FIND(_cfg, _res, _a) \ 164 CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a) 165 #define I6HASH_INSERT(_cfg, _i) \ 166 CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i) 167 #define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \ 168 CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a) 169 170 #define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \ 171 CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg) 172 173 #define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8) 174 175 static unsigned 176 djb_hash(const unsigned char *h, const int len) 177 { 178 unsigned int result = 0; 179 int i; 180 181 for (i = 0; i < len; i++) 182 result = 33 * result ^ h[i]; 183 184 return (result); 185 } 186 187 /* 188 static size_t 189 bitmask_size(size_t num, int *level) 190 { 191 size_t x; 192 int c; 193 194 for (c = 0, x = num; num > 1; num /= 64, c++) 195 ; 196 197 return (x); 198 } 199 200 static void 201 bitmask_prepare(uint64_t *pmask, size_t bufsize, int level) 202 { 203 size_t x, z; 204 205 memset(pmask, 0xFF, bufsize); 206 for (x = 0, z = 1; level > 1; x += z, z *= 64, level--) 207 ; 208 pmask[x] ~= 0x01; 209 } 210 */ 211 212 static void 213 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, 214 uint32_t n, uint32_t sn) 215 { 216 217 memset(plog, 0, sizeof(*plog)); 218 plog->length = PFLOG_REAL_HDRLEN; 219 plog->af = family; 220 plog->action = PF_NAT; 221 plog->dir = PF_IN; 222 plog->rulenr = htonl(n); 223 plog->subrulenr = htonl(sn); 224 plog->ruleset[0] = '\0'; 225 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); 226 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); 227 } 228 /* 229 * Inspects icmp packets to see if the message contains different 230 * packet header so we need to alter @addr and @port. 231 */ 232 static int 233 inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr, 234 uint16_t *port) 235 { 236 struct ip *ip; 237 struct tcphdr *tcp; 238 struct udphdr *udp; 239 struct icmphdr *icmp; 240 int off; 241 uint8_t proto; 242 243 ip = mtod(*m, struct ip *); /* Outer IP header */ 244 off = (ip->ip_hl << 2) + ICMP_MINLEN; 245 if ((*m)->m_len < off) 246 *m = m_pullup(*m, off); 247 if (*m == NULL) 248 return (ENOMEM); 249 250 ip = mtod(*m, struct ip *); /* Outer IP header */ 251 icmp = L3HDR(ip, struct icmphdr *); 252 switch (icmp->icmp_type) { 253 case ICMP_ECHO: 254 case ICMP_ECHOREPLY: 255 /* Use icmp ID as distinguisher */ 256 *port = ntohs(*((uint16_t *)(icmp + 1))); 257 return (0); 258 case ICMP_UNREACH: 259 case ICMP_TIMXCEED: 260 break; 261 default: 262 return (EOPNOTSUPP); 263 } 264 /* 265 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits 266 * of ULP header. 267 */ 268 if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) 269 return (EINVAL); 270 if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) 271 *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN); 272 if (*m == NULL) 273 return (ENOMEM); 274 ip = mtodo(*m, off); /* Inner IP header */ 275 proto = ip->ip_p; 276 off += ip->ip_hl << 2; /* Skip inner IP header */ 277 *addr = ntohl(ip->ip_src.s_addr); 278 if ((*m)->m_len < off + ICMP_MINLEN) 279 *m = m_pullup(*m, off + ICMP_MINLEN); 280 if (*m == NULL) 281 return (ENOMEM); 282 switch (proto) { 283 case IPPROTO_TCP: 284 tcp = mtodo(*m, off); 285 *nat_proto = NAT_PROTO_TCP; 286 *port = ntohs(tcp->th_sport); 287 return (0); 288 case IPPROTO_UDP: 289 udp = mtodo(*m, off); 290 *nat_proto = NAT_PROTO_UDP; 291 *port = ntohs(udp->uh_sport); 292 return (0); 293 case IPPROTO_ICMP: 294 /* 295 * We will translate only ICMP errors for our ICMP 296 * echo requests. 297 */ 298 icmp = mtodo(*m, off); 299 if (icmp->icmp_type != ICMP_ECHO) 300 return (EOPNOTSUPP); 301 *port = ntohs(*((uint16_t *)(icmp + 1))); 302 return (0); 303 }; 304 return (EOPNOTSUPP); 305 } 306 307 static inline uint8_t 308 convert_tcp_flags(uint8_t flags) 309 { 310 uint8_t result; 311 312 result = flags & (TH_FIN|TH_SYN); 313 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ 314 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ 315 316 return (result); 317 } 318 319 static NAT64NOINLINE int 320 nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, 321 struct mbuf **pm) 322 { 323 struct pfloghdr loghdr, *logdata; 324 struct in6_addr src6; 325 struct nat64lsn_portgroup *pg; 326 struct nat64lsn_host *nh; 327 struct nat64lsn_state *st; 328 struct ip *ip; 329 uint32_t addr; 330 uint16_t state_flags, state_ts; 331 uint16_t port, lport; 332 uint8_t nat_proto; 333 int ret; 334 335 addr = f_id->dst_ip; 336 port = f_id->dst_port; 337 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 338 NAT64STAT_INC(&cfg->base.stats, nomatch4); 339 return (cfg->nomatch_verdict); 340 } 341 342 /* Check if protocol is supported and get its short id */ 343 nat_proto = nat64lsn_proto_map[f_id->proto]; 344 if (nat_proto == 0) { 345 NAT64STAT_INC(&cfg->base.stats, noproto); 346 return (cfg->nomatch_verdict); 347 } 348 349 /* We might need to handle icmp differently */ 350 if (nat_proto == NAT_PROTO_ICMP) { 351 ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port); 352 if (ret != 0) { 353 if (ret == ENOMEM) { 354 NAT64STAT_INC(&cfg->base.stats, nomem); 355 return (IP_FW_DENY); 356 } 357 NAT64STAT_INC(&cfg->base.stats, noproto); 358 return (cfg->nomatch_verdict); 359 } 360 /* XXX: Check addr for validity */ 361 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 362 NAT64STAT_INC(&cfg->base.stats, nomatch4); 363 return (cfg->nomatch_verdict); 364 } 365 } 366 367 /* Calc portgroup offset w.r.t protocol */ 368 pg = GET_PORTGROUP(cfg, addr, nat_proto, port); 369 370 /* Check if this port is occupied by any portgroup */ 371 if (pg == NULL) { 372 NAT64STAT_INC(&cfg->base.stats, nomatch4); 373 #if 0 374 DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port, 375 _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port)); 376 #endif 377 return (cfg->nomatch_verdict); 378 } 379 380 /* TODO: Check flags to see if we need to do some static mapping */ 381 nh = pg->host; 382 383 /* Prepare some fields we might need to update */ 384 SET_AGE(state_ts); 385 ip = mtod(*pm, struct ip *); 386 if (ip->ip_p == IPPROTO_TCP) 387 state_flags = convert_tcp_flags( 388 L3HDR(ip, struct tcphdr *)->th_flags); 389 else 390 state_flags = 0; 391 392 /* Lock host and get port mapping */ 393 NAT64_LOCK(nh); 394 395 st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)]; 396 if (st->timestamp != state_ts) 397 st->timestamp = state_ts; 398 if ((st->flags & state_flags) != state_flags) 399 st->flags |= state_flags; 400 lport = htons(st->u.s.lport); 401 402 NAT64_UNLOCK(nh); 403 404 if (cfg->base.flags & NAT64_LOG) { 405 logdata = &loghdr; 406 nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off); 407 } else 408 logdata = NULL; 409 410 nat64_embed_ip4(&cfg->base, htonl(f_id->src_ip), &src6); 411 ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport, 412 &cfg->base, logdata); 413 414 if (ret == NAT64SKIP) 415 return (cfg->nomatch_verdict); 416 if (ret == NAT64MFREE) 417 m_freem(*pm); 418 *pm = NULL; 419 420 return (IP_FW_DENY); 421 } 422 423 void 424 nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, 425 const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, 426 const char *px, int off) 427 { 428 char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN]; 429 430 if ((V_nat64_debug & DP_STATE) == 0) 431 return; 432 inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s)); 433 inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a)); 434 inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d)); 435 436 DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> " 437 "%s:%d AGE %d", px, pg->idx, st, off, 438 s, st->u.s.lport, pg->nat_proto, a, pg->aport + off, 439 d, st->u.s.fport, GET_AGE(st->timestamp)); 440 } 441 442 /* 443 * Check if particular TCP state is stale and should be deleted. 444 * Return 1 if true, 0 otherwise. 445 */ 446 static int 447 nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg, 448 const struct nat64lsn_state *st, int age) 449 { 450 int ttl; 451 452 if (st->flags & NAT64_FLAG_FIN) 453 ttl = cfg->st_close_ttl; 454 else if (st->flags & NAT64_FLAG_ESTAB) 455 ttl = cfg->st_estab_ttl; 456 else if (st->flags & NAT64_FLAG_SYN) 457 ttl = cfg->st_syn_ttl; 458 else 459 ttl = cfg->st_syn_ttl; 460 461 if (age > ttl) 462 return (1); 463 return (0); 464 } 465 466 /* 467 * Check if nat state @st is stale and should be deleted. 468 * Return 1 if true, 0 otherwise. 469 */ 470 static NAT64NOINLINE int 471 nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg, 472 const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st) 473 { 474 int age, delete; 475 476 age = GET_AGE(st->timestamp); 477 delete = 0; 478 479 /* Skip immutable records */ 480 if (st->flags & NAT64_FLAG_RDR) 481 return (0); 482 483 switch (pg->nat_proto) { 484 case NAT_PROTO_TCP: 485 delete = nat64lsn_periodic_check_tcp(cfg, st, age); 486 break; 487 case NAT_PROTO_UDP: 488 if (age > cfg->st_udp_ttl) 489 delete = 1; 490 break; 491 case NAT_PROTO_ICMP: 492 if (age > cfg->st_icmp_ttl) 493 delete = 1; 494 break; 495 } 496 497 return (delete); 498 } 499 500 501 /* 502 * The following structures and functions 503 * are used to perform SLIST_FOREACH_SAFE() 504 * analog for states identified by struct st_ptr. 505 */ 506 507 struct st_idx { 508 struct nat64lsn_portgroup *pg; 509 struct nat64lsn_state *st; 510 struct st_ptr sidx_next; 511 }; 512 513 static struct st_idx * 514 st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, 515 struct st_ptr *sidx, struct st_idx *si) 516 { 517 struct nat64lsn_portgroup *pg; 518 struct nat64lsn_state *st; 519 520 if (sidx->idx == 0) { 521 memset(si, 0, sizeof(*si)); 522 return (si); 523 } 524 525 pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx); 526 st = &pg->states[sidx->off]; 527 528 si->pg = pg; 529 si->st = st; 530 si->sidx_next = st->next; 531 532 return (si); 533 } 534 535 static struct st_idx * 536 st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, 537 struct st_idx *si) 538 { 539 struct st_ptr sidx; 540 struct nat64lsn_portgroup *pg; 541 struct nat64lsn_state *st; 542 543 sidx = si->sidx_next; 544 if (sidx.idx == 0) { 545 memset(si, 0, sizeof(*si)); 546 si->st = NULL; 547 si->pg = NULL; 548 return (si); 549 } 550 551 pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); 552 st = &pg->states[sidx.off]; 553 554 si->pg = pg; 555 si->st = st; 556 si->sidx_next = st->next; 557 558 return (si); 559 } 560 561 static struct st_idx * 562 st_save_cond(struct st_idx *si_dst, struct st_idx *si) 563 { 564 if (si->st != NULL) 565 *si_dst = *si; 566 567 return (si_dst); 568 } 569 570 unsigned int 571 nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh) 572 { 573 struct st_idx si, si_prev; 574 int i; 575 unsigned int delcount; 576 577 delcount = 0; 578 for (i = 0; i < nh->hsize; i++) { 579 memset(&si_prev, 0, sizeof(si_prev)); 580 for (st_first(cfg, nh, &nh->phash[i], &si); 581 si.st != NULL; 582 st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) { 583 if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0) 584 continue; 585 nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE", 586 si.st->cur.off); 587 /* Unlink from hash */ 588 if (si_prev.st != NULL) 589 si_prev.st->next = si.st->next; 590 else 591 nh->phash[i] = si.st->next; 592 /* Delete state and free its data */ 593 PG_MARK_FREE_IDX(si.pg, si.st->cur.off); 594 memset(si.st, 0, sizeof(struct nat64lsn_state)); 595 si.st = NULL; 596 delcount++; 597 598 /* Update portgroup timestamp */ 599 SET_AGE(si.pg->timestamp); 600 } 601 } 602 NAT64STAT_ADD(&cfg->base.stats, sdeleted, delcount); 603 return (delcount); 604 } 605 606 /* 607 * Checks if portgroup is not used and can be deleted, 608 * Returns 1 if stale, 0 otherwise 609 */ 610 static int 611 stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg) 612 { 613 614 if (!PG_IS_EMPTY(pg)) 615 return (0); 616 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) 617 return (0); 618 return (1); 619 } 620 621 /* 622 * Checks if host record is not used and can be deleted, 623 * Returns 1 if stale, 0 otherwise 624 */ 625 static int 626 stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh) 627 { 628 629 if (nh->pg_used != 0) 630 return (0); 631 if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay) 632 return (0); 633 return (1); 634 } 635 636 struct nat64lsn_periodic_data { 637 struct nat64lsn_cfg *cfg; 638 struct nat64lsn_job_head jhead; 639 int jlen; 640 }; 641 642 static NAT64NOINLINE int 643 nat64lsn_periodic_chkhost(struct nat64lsn_host *nh, 644 struct nat64lsn_periodic_data *d) 645 { 646 struct nat64lsn_portgroup *pg; 647 struct nat64lsn_job_item *ji; 648 uint64_t delmask[NAT64LSN_PGPTRNMASK]; 649 int delcount, i; 650 651 delcount = 0; 652 memset(delmask, 0, sizeof(delmask)); 653 654 if (V_nat64_debug & DP_JQUEUE) { 655 char a[INET6_ADDRSTRLEN]; 656 657 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 658 DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d", 659 stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu); 660 } 661 if (!stale_nh(d->cfg, nh)) { 662 /* Non-stale host. Inspect internals */ 663 NAT64_LOCK(nh); 664 665 /* Stage 1: Check&expire states */ 666 if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0) 667 SET_AGE(nh->timestamp); 668 669 /* Stage 2: Check if we need to expire */ 670 for (i = 0; i < nh->pg_used; i++) { 671 pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1); 672 if (pg == NULL) 673 continue; 674 675 /* Check if we can delete portgroup */ 676 if (stale_pg(d->cfg, pg) == 0) 677 continue; 678 679 DPRINTF(DP_JQUEUE, "Check PG %d", i); 680 delmask[i / 64] |= ((uint64_t)1 << (i % 64)); 681 delcount++; 682 } 683 684 NAT64_UNLOCK(nh); 685 if (delcount == 0) 686 return (0); 687 } 688 689 DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount); 690 /* We have something to delete - add it to queue */ 691 ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP); 692 if (ji == NULL) 693 return (0); 694 695 ji->haddr = nh->addr; 696 ji->delcount = delcount; 697 memcpy(ji->delmask, delmask, sizeof(ji->delmask)); 698 699 TAILQ_INSERT_TAIL(&d->jhead, ji, next); 700 d->jlen++; 701 return (0); 702 } 703 704 /* 705 * This procedure is used to perform various maintance 706 * on dynamic hash list. Currently it is called every second. 707 */ 708 static void 709 nat64lsn_periodic(void *data) 710 { 711 struct ip_fw_chain *ch; 712 IPFW_RLOCK_TRACKER; 713 struct nat64lsn_cfg *cfg; 714 struct nat64lsn_periodic_data d; 715 struct nat64lsn_host *nh, *tmp; 716 717 cfg = (struct nat64lsn_cfg *) data; 718 ch = cfg->ch; 719 CURVNET_SET(cfg->vp); 720 721 memset(&d, 0, sizeof(d)); 722 d.cfg = cfg; 723 TAILQ_INIT(&d.jhead); 724 725 IPFW_RLOCK(ch); 726 727 /* Stage 1: foreach host, check all its portgroups */ 728 I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d); 729 730 /* Enqueue everything we have requested */ 731 nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen); 732 733 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); 734 735 IPFW_RUNLOCK(ch); 736 737 CURVNET_RESTORE(); 738 } 739 740 static NAT64NOINLINE void 741 reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 742 { 743 744 if (ji->m == NULL) 745 return; 746 747 /* Request has failed or packet type is wrong */ 748 if (ji->f_id.addr_type != 6 || ji->done == 0) { 749 m_freem(ji->m); 750 ji->m = NULL; 751 NAT64STAT_INC(&cfg->base.stats, dropped); 752 DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d", 753 ji->jtype, ji->done); 754 return; 755 } 756 757 /* 758 * XXX: Limit recursion level 759 */ 760 761 NAT64STAT_INC(&cfg->base.stats, jreinjected); 762 DPRINTF(DP_JQUEUE, "Reinject mbuf"); 763 nat64lsn_translate6(cfg, &ji->f_id, &ji->m); 764 } 765 766 static void 767 destroy_portgroup(struct nat64lsn_portgroup *pg) 768 { 769 770 DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg); 771 uma_zfree(nat64lsn_pg_zone, pg); 772 } 773 774 static NAT64NOINLINE int 775 alloc_portgroup(struct nat64lsn_job_item *ji) 776 { 777 struct nat64lsn_portgroup *pg; 778 779 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); 780 if (pg == NULL) 781 return (1); 782 783 if (ji->needs_idx != 0) { 784 ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); 785 /* Failed alloc isn't always fatal, so don't check */ 786 } 787 memset(&pg->freemask, 0xFF, sizeof(pg->freemask)); 788 pg->nat_proto = ji->nat_proto; 789 ji->pg = pg; 790 return (0); 791 792 } 793 794 static void 795 destroy_host6(struct nat64lsn_host *nh) 796 { 797 char a[INET6_ADDRSTRLEN]; 798 int i; 799 800 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 801 DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh, 802 nh->pg_used); 803 NAT64_LOCK_DESTROY(nh); 804 for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++) 805 uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i)); 806 uma_zfree(nat64lsn_host_zone, nh); 807 } 808 809 static NAT64NOINLINE int 810 alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 811 { 812 struct nat64lsn_host *nh; 813 char a[INET6_ADDRSTRLEN]; 814 815 nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); 816 if (nh == NULL) 817 return (1); 818 PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); 819 if (PORTGROUP_CHUNK(nh, 0) == NULL) { 820 uma_zfree(nat64lsn_host_zone, nh); 821 return (2); 822 } 823 if (alloc_portgroup(ji) != 0) { 824 NAT64STAT_INC(&cfg->base.stats, jportfails); 825 uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0)); 826 uma_zfree(nat64lsn_host_zone, nh); 827 return (3); 828 } 829 830 NAT64_LOCK_INIT(nh); 831 nh->addr = ji->haddr; 832 nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */ 833 nh->pg_allocated = NAT64LSN_PGIDX_CHUNK; 834 nh->pg_used = 0; 835 ji->nh = nh; 836 837 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 838 DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh); 839 return (0); 840 } 841 842 /* 843 * Finds free @pg index inside @nh 844 */ 845 static NAT64NOINLINE int 846 find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx) 847 { 848 int i; 849 850 for (i = 0; i < nh->pg_allocated; i++) { 851 if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) { 852 *idx = i; 853 return (0); 854 } 855 } 856 return (1); 857 } 858 859 static NAT64NOINLINE int 860 attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 861 { 862 char a[INET6_ADDRSTRLEN]; 863 struct nat64lsn_host *nh; 864 865 I6HASH_FIND(cfg, nh, &ji->haddr); 866 if (nh == NULL) { 867 /* Add new host to list */ 868 nh = ji->nh; 869 I6HASH_INSERT(cfg, nh); 870 cfg->ihcount++; 871 ji->nh = NULL; 872 873 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 874 DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh); 875 /* 876 * Try to add portgroup. 877 * Note it will automatically set 878 * 'done' on ji if successful. 879 */ 880 if (attach_portgroup(cfg, ji) != 0) { 881 DPRINTF(DP_DROPS, "%s %p failed to attach PG", 882 a, nh); 883 NAT64STAT_INC(&cfg->base.stats, jportfails); 884 return (1); 885 } 886 return (0); 887 } 888 889 /* 890 * nh isn't NULL. This probably means we had several simultaneous 891 * host requests. The previous one request has already attached 892 * this host. Requeue attached mbuf and mark job as done, but 893 * leave nh and pg pointers not changed, so nat64lsn_do_request() 894 * will release all allocated resources. 895 */ 896 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 897 DPRINTF(DP_OBJ, "%s %p is already attached as %p", 898 a, ji->nh, nh); 899 ji->done = 1; 900 return (0); 901 } 902 903 static NAT64NOINLINE int 904 find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off, 905 int nat_proto, uint16_t *aport, int *ppg_idx) 906 { 907 int j, pg_idx; 908 909 pg_idx = addr_off * _ADDR_PG_COUNT + 910 (nat_proto - 1) * _ADDR_PG_PROTO_COUNT; 911 912 for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) { 913 if (cfg->pg[pg_idx + j] != NULL) 914 continue; 915 916 *aport = j * NAT64_CHUNK_SIZE; 917 *ppg_idx = pg_idx + j; 918 return (1); 919 } 920 921 return (0); 922 } 923 924 /* 925 * XXX: This function needs to be rewritten to 926 * use free bitmask for faster pg finding, 927 * additionally, it should take into consideration 928 * a) randomization and 929 * b) previous addresses allocated to given nat instance 930 * 931 */ 932 static NAT64NOINLINE int 933 find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji, 934 uint32_t *aaddr, uint16_t *aport, int *ppg_idx) 935 { 936 int i, nat_proto; 937 938 /* 939 * XXX: Use bitmask index to be able to find/check if IP address 940 * has some spare pg's 941 */ 942 nat_proto = ji->nat_proto; 943 944 /* First, try to use same address */ 945 if (ji->aaddr != 0) { 946 i = ntohl(ji->aaddr) - cfg->prefix4; 947 if (find_pg_place_addr(cfg, i, nat_proto, aport, 948 ppg_idx) != 0){ 949 /* Found! */ 950 *aaddr = htonl(cfg->prefix4 + i); 951 return (0); 952 } 953 } 954 955 /* Next, try to use random address based on flow hash */ 956 i = ji->fhash % (1 << (32 - cfg->plen4)); 957 if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) { 958 /* Found! */ 959 *aaddr = htonl(cfg->prefix4 + i); 960 return (0); 961 } 962 963 964 /* Last one: simply find ANY available */ 965 for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { 966 if (find_pg_place_addr(cfg, i, nat_proto, aport, 967 ppg_idx) != 0){ 968 /* Found! */ 969 *aaddr = htonl(cfg->prefix4 + i); 970 return (0); 971 } 972 } 973 974 return (1); 975 } 976 977 static NAT64NOINLINE int 978 attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 979 { 980 char a[INET6_ADDRSTRLEN]; 981 struct nat64lsn_portgroup *pg; 982 struct nat64lsn_host *nh; 983 uint32_t aaddr; 984 uint16_t aport; 985 int nh_pg_idx, pg_idx; 986 987 pg = ji->pg; 988 989 /* 990 * Find source host and bind: we can't rely on 991 * pg->host 992 */ 993 I6HASH_FIND(cfg, nh, &ji->haddr); 994 if (nh == NULL) 995 return (1); 996 997 /* Find spare port chunk */ 998 if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) { 999 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 1000 DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a); 1001 return (2); 1002 } 1003 1004 /* Expand PG indexes if needed */ 1005 if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) { 1006 PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) = 1007 ji->spare_idx; 1008 nh->pg_allocated += NAT64LSN_PGIDX_CHUNK; 1009 ji->spare_idx = NULL; 1010 } 1011 1012 /* Find empty index to store PG in the @nh */ 1013 if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) { 1014 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); 1015 DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s", 1016 a); 1017 return (3); 1018 } 1019 1020 cfg->pg[pg_idx] = pg; 1021 cfg->protochunks[pg->nat_proto]++; 1022 NAT64STAT_INC(&cfg->base.stats, spgcreated); 1023 1024 pg->aaddr = aaddr; 1025 pg->aport = aport; 1026 pg->host = nh; 1027 pg->idx = pg_idx; 1028 SET_AGE(pg->timestamp); 1029 1030 PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg; 1031 if (nh->pg_used == nh_pg_idx) 1032 nh->pg_used++; 1033 SET_AGE(nh->timestamp); 1034 1035 ji->pg = NULL; 1036 ji->done = 1; 1037 1038 return (0); 1039 } 1040 1041 static NAT64NOINLINE void 1042 consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1043 { 1044 struct nat64lsn_host *nh, *nh_tmp; 1045 struct nat64lsn_portgroup *pg, *pg_list[256]; 1046 int i, pg_lidx, idx; 1047 1048 /* Find source host */ 1049 I6HASH_FIND(cfg, nh, &ji->haddr); 1050 if (nh == NULL || nh->pg_used == 0) 1051 return; 1052 1053 memset(pg_list, 0, sizeof(pg_list)); 1054 pg_lidx = 0; 1055 1056 NAT64_LOCK(nh); 1057 1058 for (i = nh->pg_used - 1; i >= 0; i--) { 1059 if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0) 1060 continue; 1061 pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); 1062 1063 /* Check that PG isn't busy. */ 1064 if (stale_pg(cfg, pg) == 0) 1065 continue; 1066 1067 /* DO delete */ 1068 pg_list[pg_lidx++] = pg; 1069 PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL; 1070 1071 idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto, 1072 pg->aport); 1073 KASSERT(cfg->pg[idx] == pg, ("Non matched pg")); 1074 cfg->pg[idx] = NULL; 1075 cfg->protochunks[pg->nat_proto]--; 1076 NAT64STAT_INC(&cfg->base.stats, spgdeleted); 1077 1078 /* Decrease pg_used */ 1079 while (nh->pg_used > 0 && 1080 PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL) 1081 nh->pg_used--; 1082 1083 /* Check if on-stack buffer has ended */ 1084 if (pg_lidx == nitems(pg_list)) 1085 break; 1086 } 1087 1088 NAT64_UNLOCK(nh); 1089 1090 if (stale_nh(cfg, nh)) { 1091 I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr); 1092 KASSERT(nh != NULL, ("Unable to find address")); 1093 cfg->ihcount--; 1094 ji->nh = nh; 1095 I6HASH_FIND(cfg, nh, &ji->haddr); 1096 KASSERT(nh == NULL, ("Failed to delete address")); 1097 } 1098 1099 /* TODO: Delay freeing portgroups */ 1100 while (pg_lidx > 0) { 1101 pg_lidx--; 1102 NAT64STAT_INC(&cfg->base.stats, spgdeleted); 1103 destroy_portgroup(pg_list[pg_lidx]); 1104 } 1105 } 1106 1107 /* 1108 * Main request handler. 1109 * Responsible for handling jqueue, e.g. 1110 * creating new hosts, addind/deleting portgroups. 1111 */ 1112 static NAT64NOINLINE void 1113 nat64lsn_do_request(void *data) 1114 { 1115 IPFW_RLOCK_TRACKER; 1116 struct nat64lsn_job_head jhead; 1117 struct nat64lsn_job_item *ji; 1118 int jcount, nhsize; 1119 struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data; 1120 struct ip_fw_chain *ch; 1121 int delcount; 1122 1123 CURVNET_SET(cfg->vp); 1124 1125 TAILQ_INIT(&jhead); 1126 1127 /* XXX: We're running unlocked here */ 1128 1129 ch = cfg->ch; 1130 delcount = 0; 1131 IPFW_RLOCK(ch); 1132 1133 /* Grab queue */ 1134 JQUEUE_LOCK(); 1135 TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next); 1136 jcount = cfg->jlen; 1137 cfg->jlen = 0; 1138 JQUEUE_UNLOCK(); 1139 1140 /* check if we need to resize hash */ 1141 nhsize = 0; 1142 if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) { 1143 nhsize = cfg->ihsize; 1144 for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2) 1145 ; 1146 } else if (cfg->ihcount < cfg->ihsize * 4) { 1147 nhsize = cfg->ihsize; 1148 for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2) 1149 ; 1150 } 1151 1152 IPFW_RUNLOCK(ch); 1153 1154 if (TAILQ_EMPTY(&jhead)) { 1155 CURVNET_RESTORE(); 1156 return; 1157 } 1158 1159 NAT64STAT_INC(&cfg->base.stats, jcalls); 1160 DPRINTF(DP_JQUEUE, "count=%d", jcount); 1161 1162 /* 1163 * TODO: 1164 * What we should do here is to build a hash 1165 * to ensure we don't have lots of duplicate requests. 1166 * Skip this for now. 1167 * 1168 * TODO: Limit per-call number of items 1169 */ 1170 1171 /* Pre-allocate everything for entire chain */ 1172 TAILQ_FOREACH(ji, &jhead, next) { 1173 switch (ji->jtype) { 1174 case JTYPE_NEWHOST: 1175 if (alloc_host6(cfg, ji) != 0) 1176 NAT64STAT_INC(&cfg->base.stats, 1177 jhostfails); 1178 break; 1179 case JTYPE_NEWPORTGROUP: 1180 if (alloc_portgroup(ji) != 0) 1181 NAT64STAT_INC(&cfg->base.stats, 1182 jportfails); 1183 break; 1184 case JTYPE_DELPORTGROUP: 1185 delcount += ji->delcount; 1186 break; 1187 default: 1188 break; 1189 } 1190 } 1191 1192 /* 1193 * TODO: Alloc hew hash 1194 */ 1195 nhsize = 0; 1196 if (nhsize > 0) { 1197 /* XXX: */ 1198 } 1199 1200 /* Apply all changes in batch */ 1201 IPFW_UH_WLOCK(ch); 1202 IPFW_WLOCK(ch); 1203 1204 TAILQ_FOREACH(ji, &jhead, next) { 1205 switch (ji->jtype) { 1206 case JTYPE_NEWHOST: 1207 if (ji->nh != NULL) 1208 attach_host6(cfg, ji); 1209 break; 1210 case JTYPE_NEWPORTGROUP: 1211 if (ji->pg != NULL && 1212 attach_portgroup(cfg, ji) != 0) 1213 NAT64STAT_INC(&cfg->base.stats, 1214 jportfails); 1215 break; 1216 case JTYPE_DELPORTGROUP: 1217 consider_del_portgroup(cfg, ji); 1218 break; 1219 } 1220 } 1221 1222 if (nhsize > 0) { 1223 /* XXX: Move everything to new hash */ 1224 } 1225 1226 IPFW_WUNLOCK(ch); 1227 IPFW_UH_WUNLOCK(ch); 1228 1229 /* Flush unused entries */ 1230 while (!TAILQ_EMPTY(&jhead)) { 1231 ji = TAILQ_FIRST(&jhead); 1232 TAILQ_REMOVE(&jhead, ji, next); 1233 if (ji->nh != NULL) 1234 destroy_host6(ji->nh); 1235 if (ji->pg != NULL) 1236 destroy_portgroup(ji->pg); 1237 if (ji->m != NULL) 1238 reinject_mbuf(cfg, ji); 1239 if (ji->spare_idx != NULL) 1240 uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx); 1241 free(ji, M_IPFW); 1242 } 1243 CURVNET_RESTORE(); 1244 } 1245 1246 static NAT64NOINLINE struct nat64lsn_job_item * 1247 nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, 1248 int jtype) 1249 { 1250 struct nat64lsn_job_item *ji; 1251 struct in6_addr haddr; 1252 uint8_t nat_proto; 1253 1254 /* 1255 * Do not try to lock possibly contested mutex if we're near the limit. 1256 * Drop packet instead. 1257 */ 1258 if (cfg->jlen >= cfg->jmaxlen) { 1259 NAT64STAT_INC(&cfg->base.stats, jmaxlen); 1260 return (NULL); 1261 } 1262 1263 memset(&haddr, 0, sizeof(haddr)); 1264 nat_proto = 0; 1265 if (f_id != NULL) { 1266 haddr = f_id->src_ip6; 1267 nat_proto = nat64lsn_proto_map[f_id->proto]; 1268 1269 DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d", 1270 nat_proto, f_id->proto); 1271 1272 if (nat_proto == 0) 1273 return (NULL); 1274 } 1275 1276 ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW, 1277 M_NOWAIT | M_ZERO); 1278 1279 if (ji == NULL) { 1280 NAT64STAT_INC(&cfg->base.stats, jnomem); 1281 return (NULL); 1282 } 1283 1284 ji->jtype = jtype; 1285 1286 if (f_id != NULL) { 1287 ji->f_id = *f_id; 1288 ji->haddr = haddr; 1289 ji->nat_proto = nat_proto; 1290 } 1291 1292 return (ji); 1293 } 1294 1295 static NAT64NOINLINE void 1296 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1297 { 1298 1299 if (ji == NULL) 1300 return; 1301 1302 JQUEUE_LOCK(); 1303 TAILQ_INSERT_TAIL(&cfg->jhead, ji, next); 1304 cfg->jlen++; 1305 NAT64STAT_INC(&cfg->base.stats, jrequests); 1306 1307 if (callout_pending(&cfg->jcallout) == 0) 1308 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); 1309 JQUEUE_UNLOCK(); 1310 } 1311 1312 static NAT64NOINLINE void 1313 nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, 1314 struct nat64lsn_job_head *jhead, int jlen) 1315 { 1316 1317 if (TAILQ_EMPTY(jhead)) 1318 return; 1319 1320 /* Attach current queue to execution one */ 1321 JQUEUE_LOCK(); 1322 TAILQ_CONCAT(&cfg->jhead, jhead, next); 1323 cfg->jlen += jlen; 1324 NAT64STAT_ADD(&cfg->base.stats, jrequests, jlen); 1325 1326 if (callout_pending(&cfg->jcallout) == 0) 1327 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); 1328 JQUEUE_UNLOCK(); 1329 } 1330 1331 static unsigned int 1332 flow6_hash(const struct ipfw_flow_id *f_id) 1333 { 1334 unsigned char hbuf[36]; 1335 1336 memcpy(hbuf, &f_id->dst_ip6, 16); 1337 memcpy(&hbuf[16], &f_id->src_ip6, 16); 1338 memcpy(&hbuf[32], &f_id->dst_port, 2); 1339 memcpy(&hbuf[32], &f_id->src_port, 2); 1340 1341 return (djb_hash(hbuf, sizeof(hbuf))); 1342 } 1343 1344 static NAT64NOINLINE int 1345 nat64lsn_request_host(struct nat64lsn_cfg *cfg, 1346 const struct ipfw_flow_id *f_id, struct mbuf **pm) 1347 { 1348 struct nat64lsn_job_item *ji; 1349 struct mbuf *m; 1350 1351 m = *pm; 1352 *pm = NULL; 1353 1354 ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST); 1355 if (ji == NULL) { 1356 m_freem(m); 1357 NAT64STAT_INC(&cfg->base.stats, dropped); 1358 DPRINTF(DP_DROPS, "failed to create job"); 1359 } else { 1360 ji->m = m; 1361 /* Provide pseudo-random value based on flow */ 1362 ji->fhash = flow6_hash(f_id); 1363 nat64lsn_enqueue_job(cfg, ji); 1364 NAT64STAT_INC(&cfg->base.stats, jhostsreq); 1365 } 1366 1367 return (IP_FW_DENY); 1368 } 1369 1370 static NAT64NOINLINE int 1371 nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, 1372 const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, 1373 int needs_idx) 1374 { 1375 struct nat64lsn_job_item *ji; 1376 struct mbuf *m; 1377 1378 m = *pm; 1379 *pm = NULL; 1380 1381 ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP); 1382 if (ji == NULL) { 1383 m_freem(m); 1384 NAT64STAT_INC(&cfg->base.stats, dropped); 1385 DPRINTF(DP_DROPS, "failed to create job"); 1386 } else { 1387 ji->m = m; 1388 /* Provide pseudo-random value based on flow */ 1389 ji->fhash = flow6_hash(f_id); 1390 ji->aaddr = aaddr; 1391 ji->needs_idx = needs_idx; 1392 nat64lsn_enqueue_job(cfg, ji); 1393 NAT64STAT_INC(&cfg->base.stats, jportreq); 1394 } 1395 1396 return (IP_FW_DENY); 1397 } 1398 1399 static NAT64NOINLINE struct nat64lsn_state * 1400 nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, 1401 int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr) 1402 { 1403 struct nat64lsn_portgroup *pg; 1404 struct nat64lsn_state *st; 1405 int i, hval, off; 1406 1407 /* XXX: create additional bitmask for selecting proper portgroup */ 1408 for (i = 0; i < nh->pg_used; i++) { 1409 pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); 1410 if (pg == NULL) 1411 continue; 1412 if (*aaddr == 0) 1413 *aaddr = pg->aaddr; 1414 if (pg->nat_proto != nat_proto) 1415 continue; 1416 1417 off = PG_GET_FREE_IDX(pg); 1418 if (off != 0) { 1419 /* We have found spare state. Use it */ 1420 off--; 1421 PG_MARK_BUSY_IDX(pg, off); 1422 st = &pg->states[off]; 1423 1424 /* 1425 * Fill in new info. Assume state was zeroed. 1426 * Timestamp and flags will be filled by caller. 1427 */ 1428 st->u.s = kst->u.s; 1429 st->cur.idx = i + 1; 1430 st->cur.off = off; 1431 1432 /* Insert into host hash table */ 1433 hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1); 1434 st->next = nh->phash[hval]; 1435 nh->phash[hval] = st->cur; 1436 1437 nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off); 1438 1439 NAT64STAT_INC(&cfg->base.stats, screated); 1440 1441 return (st); 1442 } 1443 /* Saev last used alias affress */ 1444 *aaddr = pg->aaddr; 1445 } 1446 1447 return (NULL); 1448 } 1449 1450 static NAT64NOINLINE int 1451 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, 1452 struct mbuf **pm) 1453 { 1454 struct pfloghdr loghdr, *logdata; 1455 char a[INET6_ADDRSTRLEN]; 1456 struct nat64lsn_host *nh; 1457 struct st_ptr sidx; 1458 struct nat64lsn_state *st, kst; 1459 struct nat64lsn_portgroup *pg; 1460 struct icmp6_hdr *icmp6; 1461 uint32_t aaddr; 1462 int action, hval, nat_proto, proto; 1463 uint16_t aport, state_ts, state_flags; 1464 1465 /* Check if af/protocol is supported and get it short id */ 1466 nat_proto = nat64lsn_proto_map[f_id->proto]; 1467 if (nat_proto == 0) { 1468 /* 1469 * Since we can be called from jobs handler, we need 1470 * to free mbuf by self, do not leave this task to 1471 * ipfw_check_packet(). 1472 */ 1473 NAT64STAT_INC(&cfg->base.stats, noproto); 1474 goto drop; 1475 } 1476 1477 /* Try to find host first */ 1478 I6HASH_FIND(cfg, nh, &f_id->src_ip6); 1479 1480 if (nh == NULL) 1481 return (nat64lsn_request_host(cfg, f_id, pm)); 1482 1483 /* Fill-in on-stack state structure */ 1484 kst.u.s.faddr = nat64_extract_ip4(&cfg->base, &f_id->dst_ip6); 1485 if (kst.u.s.faddr == 0) { 1486 NAT64STAT_INC(&cfg->base.stats, dropped); 1487 goto drop; 1488 } 1489 kst.u.s.fport = f_id->dst_port; 1490 kst.u.s.lport = f_id->src_port; 1491 1492 /* Prepare some fields we might need to update */ 1493 hval = 0; 1494 proto = nat64_getlasthdr(*pm, &hval); 1495 if (proto < 0) { 1496 NAT64STAT_INC(&cfg->base.stats, dropped); 1497 DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); 1498 goto drop; 1499 } 1500 1501 SET_AGE(state_ts); 1502 if (proto == IPPROTO_TCP) 1503 state_flags = convert_tcp_flags( 1504 TCP(mtodo(*pm, hval))->th_flags); 1505 else 1506 state_flags = 0; 1507 if (proto == IPPROTO_ICMPV6) { 1508 /* Alter local port data */ 1509 icmp6 = mtodo(*pm, hval); 1510 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || 1511 icmp6->icmp6_type == ICMP6_ECHO_REPLY) 1512 kst.u.s.lport = ntohs(icmp6->icmp6_id); 1513 } 1514 1515 hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1); 1516 pg = NULL; 1517 st = NULL; 1518 1519 /* OK, let's find state in host hash */ 1520 NAT64_LOCK(nh); 1521 sidx = nh->phash[hval]; 1522 int k = 0; 1523 while (sidx.idx != 0) { 1524 pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); 1525 st = &pg->states[sidx.off]; 1526 //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off, 1527 //st->next.idx, st->next.off); 1528 if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto) 1529 break; 1530 if (k++ > 1000) { 1531 DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n", 1532 sidx.idx, sidx.off, st->next.idx, st->next.off); 1533 DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d", 1534 inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)), 1535 nh, curcpu); 1536 k = 0; 1537 } 1538 sidx = st->next; 1539 } 1540 1541 if (sidx.idx == 0) { 1542 aaddr = 0; 1543 st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr); 1544 if (st == NULL) { 1545 /* No free states. Request more if we can */ 1546 if (nh->pg_used >= cfg->max_chunks) { 1547 /* Limit reached */ 1548 DPRINTF(DP_DROPS, "PG limit reached " 1549 " for host %s (used %u, allocated %u, " 1550 "limit %u)", inet_ntop(AF_INET6, 1551 &nh->addr, a, sizeof(a)), 1552 nh->pg_used * NAT64_CHUNK_SIZE, 1553 nh->pg_allocated * NAT64_CHUNK_SIZE, 1554 cfg->max_chunks * NAT64_CHUNK_SIZE); 1555 NAT64_UNLOCK(nh); 1556 NAT64STAT_INC(&cfg->base.stats, dropped); 1557 goto drop; 1558 } 1559 if ((nh->pg_allocated <= 1560 nh->pg_used + NAT64LSN_REMAININGPG) && 1561 nh->pg_allocated < cfg->max_chunks) 1562 action = 1; /* Request new indexes */ 1563 else 1564 action = 0; 1565 NAT64_UNLOCK(nh); 1566 //DPRINTF("No state, unlock for %p", nh); 1567 return (nat64lsn_request_portgroup(cfg, f_id, 1568 pm, aaddr, action)); 1569 } 1570 1571 /* We've got new state. */ 1572 sidx = st->cur; 1573 pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); 1574 } 1575 1576 /* Okay, state found */ 1577 1578 /* Update necessary fileds */ 1579 if (st->timestamp != state_ts) 1580 st->timestamp = state_ts; 1581 if ((st->flags & state_flags) != 0) 1582 st->flags |= state_flags; 1583 1584 /* Copy needed state data */ 1585 aaddr = pg->aaddr; 1586 aport = htons(pg->aport + sidx.off); 1587 1588 NAT64_UNLOCK(nh); 1589 1590 if (cfg->base.flags & NAT64_LOG) { 1591 logdata = &loghdr; 1592 nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off); 1593 } else 1594 logdata = NULL; 1595 1596 action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->base, logdata); 1597 if (action == NAT64SKIP) 1598 return (cfg->nomatch_verdict); 1599 if (action == NAT64MFREE) { 1600 drop: 1601 m_freem(*pm); 1602 } 1603 *pm = NULL; /* mark mbuf as consumed */ 1604 return (IP_FW_DENY); 1605 } 1606 1607 /* 1608 * Main dataplane entry point. 1609 */ 1610 int 1611 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, 1612 ipfw_insn *cmd, int *done) 1613 { 1614 ipfw_insn *icmd; 1615 struct nat64lsn_cfg *cfg; 1616 int ret; 1617 1618 IPFW_RLOCK_ASSERT(ch); 1619 1620 *done = 1; /* terminate the search */ 1621 icmd = cmd + 1; 1622 if (cmd->opcode != O_EXTERNAL_ACTION || 1623 cmd->arg1 != V_nat64lsn_eid || 1624 icmd->opcode != O_EXTERNAL_INSTANCE || 1625 (cfg = NAT64_LOOKUP(ch, icmd)) == NULL) 1626 return (0); 1627 1628 switch (args->f_id.addr_type) { 1629 case 4: 1630 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); 1631 break; 1632 case 6: 1633 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); 1634 break; 1635 default: 1636 return (cfg->nomatch_verdict); 1637 } 1638 return (ret); 1639 } 1640 1641 static int 1642 nat64lsn_ctor_host(void *mem, int size, void *arg, int flags) 1643 { 1644 struct nat64lsn_host *nh; 1645 1646 nh = (struct nat64lsn_host *)mem; 1647 memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr)); 1648 memset(nh->phash, 0, sizeof(nh->phash)); 1649 return (0); 1650 } 1651 1652 static int 1653 nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags) 1654 { 1655 1656 memset(mem, 0, size); 1657 return (0); 1658 } 1659 1660 void 1661 nat64lsn_init_internal(void) 1662 { 1663 1664 memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map)); 1665 /* Set up supported protocol map */ 1666 nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP; 1667 nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP; 1668 nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP; 1669 nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP; 1670 /* Fill in reverse proto map */ 1671 memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map)); 1672 nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP; 1673 nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP; 1674 nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6; 1675 1676 JQUEUE_LOCK_INIT(); 1677 nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone", 1678 sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL, 1679 NULL, NULL, UMA_ALIGN_PTR, 0); 1680 nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone", 1681 sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL, 1682 UMA_ALIGN_PTR, 0); 1683 nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone", 1684 sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK, 1685 nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1686 } 1687 1688 void 1689 nat64lsn_uninit_internal(void) 1690 { 1691 1692 JQUEUE_LOCK_DESTROY(); 1693 uma_zdestroy(nat64lsn_host_zone); 1694 uma_zdestroy(nat64lsn_pg_zone); 1695 uma_zdestroy(nat64lsn_pgidx_zone); 1696 } 1697 1698 void 1699 nat64lsn_start_instance(struct nat64lsn_cfg *cfg) 1700 { 1701 1702 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, 1703 nat64lsn_periodic, cfg); 1704 } 1705 1706 struct nat64lsn_cfg * 1707 nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr) 1708 { 1709 struct nat64lsn_cfg *cfg; 1710 1711 cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO); 1712 TAILQ_INIT(&cfg->jhead); 1713 cfg->vp = curvnet; 1714 cfg->ch = ch; 1715 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK); 1716 1717 cfg->ihsize = NAT64LSN_HSIZE; 1718 cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW, 1719 M_WAITOK | M_ZERO); 1720 1721 cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW, 1722 M_WAITOK | M_ZERO); 1723 1724 callout_init(&cfg->periodic, CALLOUT_MPSAFE); 1725 callout_init(&cfg->jcallout, CALLOUT_MPSAFE); 1726 1727 return (cfg); 1728 } 1729 1730 /* 1731 * Destroy all hosts callback. 1732 * Called on module unload when all activity already finished, so 1733 * can work without any locks. 1734 */ 1735 static NAT64NOINLINE int 1736 nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg) 1737 { 1738 struct nat64lsn_portgroup *pg; 1739 int i; 1740 1741 for (i = nh->pg_used; i > 0; i--) { 1742 pg = PORTGROUP_BYSIDX(cfg, nh, i); 1743 if (pg == NULL) 1744 continue; 1745 cfg->pg[pg->idx] = NULL; 1746 destroy_portgroup(pg); 1747 nh->pg_used--; 1748 } 1749 destroy_host6(nh); 1750 cfg->ihcount--; 1751 return (0); 1752 } 1753 1754 void 1755 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) 1756 { 1757 struct nat64lsn_host *nh, *tmp; 1758 1759 callout_drain(&cfg->jcallout); 1760 callout_drain(&cfg->periodic); 1761 I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg); 1762 DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount); 1763 1764 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS); 1765 free(cfg->ih, M_IPFW); 1766 free(cfg->pg, M_IPFW); 1767 free(cfg, M_IPFW); 1768 } 1769 1770