1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2020 Yandex LLC 5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * Copyright (c) 2016-2020 Andrey V. Elsukov <ae@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/counter.h> 33 #include <sys/ck.h> 34 #include <sys/epoch.h> 35 #include <sys/errno.h> 36 #include <sys/hash.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/module.h> 42 #include <sys/rmlock.h> 43 #include <sys/socket.h> 44 #include <sys/syslog.h> 45 #include <sys/sysctl.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_pflog.h> 50 #include <net/pfil.h> 51 52 #include <netinet/in.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_var.h> 55 #include <netinet/ip_fw.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/ip_icmp.h> 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 #include <netinet6/in6_var.h> 62 #include <netinet6/ip6_var.h> 63 #include <netinet6/ip_fw_nat64.h> 64 65 #include <netpfil/ipfw/ip_fw_private.h> 66 #include <netpfil/pf/pf.h> 67 68 #include "nat64lsn.h" 69 70 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); 71 72 #define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et) 73 #define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et) 74 #define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT() 75 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c)) 76 77 static uma_zone_t nat64lsn_host_zone; 78 static uma_zone_t nat64lsn_pgchunk_zone; 79 static uma_zone_t nat64lsn_pg_zone; 80 static uma_zone_t nat64lsn_aliaslink_zone; 81 static uma_zone_t nat64lsn_state_zone; 82 static uma_zone_t nat64lsn_job_zone; 83 84 static void nat64lsn_periodic(void *data); 85 #define PERIODIC_DELAY 4 86 #define NAT64_LOOKUP(chain, cmd) \ 87 (struct nat64lsn_instance *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx) 88 /* 89 * Delayed job queue, used to create new hosts 90 * and new portgroups 91 */ 92 enum nat64lsn_jtype { 93 JTYPE_NEWHOST = 1, 94 JTYPE_NEWPORTGROUP, 95 JTYPE_DESTROY, 96 }; 97 98 struct nat64lsn_job_item { 99 STAILQ_ENTRY(nat64lsn_job_item) entries; 100 enum nat64lsn_jtype jtype; 101 102 union { 103 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */ 104 struct mbuf *m; 105 struct nat64lsn_host *host; 106 struct nat64lsn_state *state; 107 uint32_t src6_hval; 108 uint32_t state_hval; 109 struct ipfw_flow_id f_id; 110 in_addr_t faddr; 111 uint16_t port; 112 uint8_t proto; 113 uint8_t done; 114 }; 115 struct { /* used by JTYPE_DESTROY */ 116 struct nat64lsn_hosts_slist hosts; 117 struct nat64lsn_pg_slist portgroups; 118 struct nat64lsn_pgchunk *pgchunk; 119 struct epoch_context epoch_ctx; 120 }; 121 }; 122 }; 123 124 static struct mtx jmtx; 125 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF) 126 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx) 127 #define JQUEUE_LOCK() mtx_lock(&jmtx) 128 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx) 129 130 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, 131 struct nat64lsn_job_item *ji); 132 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, 133 struct nat64lsn_job_item *ji); 134 static struct nat64lsn_job_item *nat64lsn_create_job( 135 struct nat64lsn_cfg *cfg, int jtype); 136 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, 137 struct nat64lsn_job_item *ji); 138 static void nat64lsn_job_destroy(epoch_context_t ctx); 139 static void nat64lsn_destroy_host(struct nat64lsn_host *host); 140 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg); 141 142 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, 143 const struct ipfw_flow_id *f_id, struct mbuf **mp); 144 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, 145 struct ipfw_flow_id *f_id, struct mbuf **mp); 146 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, 147 struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags); 148 149 #define NAT64_BIT_TCP_FIN 0 /* FIN was seen */ 150 #define NAT64_BIT_TCP_SYN 1 /* First syn in->out */ 151 #define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */ 152 #define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */ 153 #define NAT64_BIT_STALE 7 /* state is going to be expired */ 154 155 #define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN) 156 #define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN) 157 #define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB) 158 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) 159 160 #define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4) 161 #define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE) 162 163 static inline uint8_t 164 convert_tcp_flags(uint8_t flags) 165 { 166 uint8_t result; 167 168 result = flags & (TH_FIN|TH_SYN); 169 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ 170 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ 171 172 return (result); 173 } 174 175 static void 176 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, 177 struct nat64lsn_state *state) 178 { 179 180 memset(plog, 0, sizeof(*plog)); 181 plog->length = PFLOG_REAL_HDRLEN; 182 plog->af = family; 183 plog->action = PF_NAT; 184 plog->dir = PF_IN; 185 plog->rulenr = htonl(state->ip_src); 186 plog->subrulenr = htonl((uint32_t)(state->aport << 16) | 187 (state->proto << 8) | (state->ip_dst & 0xff)); 188 plog->ruleset[0] = '\0'; 189 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); 190 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); 191 } 192 193 #define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s)) 194 #define HOST_HVAL(c, a) HVAL((a),\ 195 sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed) 196 #define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)]) 197 198 #define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\ 199 sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed) 200 #define ALIAS_BYHASH(c, v) \ 201 ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)]) 202 static struct nat64lsn_aliaslink* 203 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused, 204 struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused) 205 { 206 207 /* 208 * We can implement some different algorithms how 209 * select an alias address. 210 * XXX: for now we use first available. 211 */ 212 return (CK_SLIST_FIRST(&host->aliases)); 213 } 214 215 static struct nat64lsn_alias* 216 nat64lsn_get_alias(struct nat64lsn_cfg *cfg, 217 const struct ipfw_flow_id *f_id __unused) 218 { 219 static uint32_t idx = 0; 220 221 /* 222 * We can choose alias by number of allocated PGs, 223 * not used yet by other hosts, or some static configured 224 * by user. 225 * XXX: for now we choose it using round robin. 226 */ 227 return (&ALIAS_BYHASH(cfg, idx++)); 228 } 229 230 #define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed) 231 #define STATE_HASH(h, v) \ 232 ((h)->states_hash[(v) & ((h)->states_hashsize - 1)]) 233 #define STATES_CHUNK(p, v) \ 234 ((p)->chunks_count == 1 ? (p)->states : \ 235 ((p)->states_chunk[CHUNK_BY_FADDR(p, v)])) 236 237 #ifdef __LP64__ 238 #define FREEMASK_FFSLL(pg, faddr) \ 239 ffsll(*FREEMASK_CHUNK((pg), (faddr))) 240 #define FREEMASK_BTR(pg, faddr, bit) \ 241 ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit)) 242 #define FREEMASK_BTS(pg, faddr, bit) \ 243 ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit)) 244 #define FREEMASK_ISSET(pg, faddr, bit) \ 245 ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit)) 246 #define FREEMASK_COPY(pg, n, out) \ 247 (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n))) 248 #else 249 static inline int 250 freemask_ffsll(uint32_t *freemask) 251 { 252 int i; 253 254 if ((i = ffsl(freemask[0])) != 0) 255 return (i); 256 if ((i = ffsl(freemask[1])) != 0) 257 return (i + 32); 258 return (0); 259 } 260 #define FREEMASK_FFSLL(pg, faddr) \ 261 freemask_ffsll(FREEMASK_CHUNK((pg), (faddr))) 262 #define FREEMASK_BTR(pg, faddr, bit) \ 263 ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32) 264 #define FREEMASK_BTS(pg, faddr, bit) \ 265 ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32) 266 #define FREEMASK_ISSET(pg, faddr, bit) \ 267 ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32) 268 #define FREEMASK_COPY(pg, n, out) \ 269 (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \ 270 ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32) 271 #endif /* !__LP64__ */ 272 273 274 #define NAT64LSN_TRY_PGCNT 36 275 static struct nat64lsn_pg* 276 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask, 277 struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, in_addr_t faddr) 278 { 279 struct nat64lsn_pg *pg; 280 uint32_t idx, oldidx; 281 int cnt; 282 283 /* First try last used PG. */ 284 idx = oldidx = ck_pr_load_32(pgidx); 285 MPASS(idx < 1024); 286 cnt = 0; 287 do { 288 ck_pr_fence_load(); 289 if (idx > 1023 || !ISSET32(*chunkmask, idx / 32)) { 290 /* If it is first try, reset idx to first PG */ 291 idx = 0; 292 /* Stop if idx is out of range */ 293 if (cnt > 0) 294 break; 295 } 296 if (ISSET32(pgmask[idx / 32], idx % 32)) { 297 pg = ck_pr_load_ptr( 298 &chunks[idx / 32]->pgptr[idx % 32]); 299 ck_pr_fence_load(); 300 /* 301 * Make sure that pg did not become DEAD. 302 */ 303 if ((pg->flags & NAT64LSN_DEADPG) == 0 && 304 FREEMASK_BITCOUNT(pg, faddr) > 0) { 305 if (cnt > 0) 306 ck_pr_cas_32(pgidx, oldidx, idx); 307 return (pg); 308 } 309 } 310 idx++; 311 } while (++cnt < NAT64LSN_TRY_PGCNT); 312 if (oldidx != idx) 313 ck_pr_cas_32(pgidx, oldidx, idx); 314 return (NULL); 315 } 316 317 static struct nat64lsn_state* 318 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host, 319 const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr, 320 uint16_t port, uint8_t proto) 321 { 322 struct nat64lsn_aliaslink *link; 323 struct nat64lsn_state *state; 324 struct nat64lsn_pg *pg; 325 int i, offset; 326 327 NAT64LSN_EPOCH_ASSERT(); 328 329 /* Check that we already have state for given arguments */ 330 CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) { 331 if (state->proto == proto && state->ip_dst == faddr && 332 state->sport == port && state->dport == f_id->dst_port) 333 return (state); 334 } 335 336 link = nat64lsn_get_aliaslink(cfg, host, f_id); 337 if (link == NULL) 338 return (NULL); 339 340 switch (proto) { 341 case IPPROTO_TCP: 342 pg = nat64lsn_get_pg(&link->alias->tcp_chunkmask, 343 link->alias->tcp_pgmask, link->alias->tcp, 344 &link->alias->tcp_pgidx, faddr); 345 break; 346 case IPPROTO_UDP: 347 pg = nat64lsn_get_pg(&link->alias->udp_chunkmask, 348 link->alias->udp_pgmask, link->alias->udp, 349 &link->alias->udp_pgidx, faddr); 350 break; 351 case IPPROTO_ICMP: 352 pg = nat64lsn_get_pg(&link->alias->icmp_chunkmask, 353 link->alias->icmp_pgmask, link->alias->icmp, 354 &link->alias->icmp_pgidx, faddr); 355 break; 356 default: 357 panic("%s: wrong proto %d", __func__, proto); 358 } 359 if (pg == NULL || (pg->flags & NAT64LSN_DEADPG) != 0) 360 return (NULL); 361 362 /* Check that PG has some free states */ 363 state = NULL; 364 i = FREEMASK_BITCOUNT(pg, faddr); 365 while (i-- > 0) { 366 offset = FREEMASK_FFSLL(pg, faddr); 367 if (offset == 0) { 368 /* 369 * We lost the race. 370 * No more free states in this PG. 371 */ 372 break; 373 } 374 375 /* Lets try to atomically grab the state */ 376 if (FREEMASK_BTR(pg, faddr, offset - 1)) { 377 state = &STATES_CHUNK(pg, faddr)->state[offset - 1]; 378 /* Initialize */ 379 state->flags = proto != IPPROTO_TCP ? 0 : 380 convert_tcp_flags(f_id->_flags); 381 state->proto = proto; 382 state->aport = pg->base_port + offset - 1; 383 state->dport = f_id->dst_port; 384 state->sport = port; 385 state->ip6_dst = f_id->dst_ip6; 386 state->ip_dst = faddr; 387 state->ip_src = link->alias->addr; 388 state->hval = hval; 389 state->host = host; 390 SET_AGE(state->timestamp); 391 392 /* Insert new state into host's hash table */ 393 HOST_LOCK(host); 394 SET_AGE(host->timestamp); 395 CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval), 396 state, entries); 397 host->states_count++; 398 HOST_UNLOCK(host); 399 NAT64STAT_INC(&cfg->base.stats, screated); 400 /* Mark the state as ready for translate4 */ 401 ck_pr_fence_store(); 402 ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4); 403 break; 404 } 405 } 406 return (state); 407 } 408 409 /* 410 * Inspects icmp packets to see if the message contains different 411 * packet header so we need to alter @addr and @port. 412 */ 413 static int 414 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr, 415 uint16_t *port) 416 { 417 struct icmp *icmp; 418 struct ip *ip; 419 int off; 420 uint8_t inner_proto; 421 422 ip = mtod(*mp, struct ip *); /* Outer IP header */ 423 off = (ip->ip_hl << 2) + ICMP_MINLEN; 424 if ((*mp)->m_len < off) 425 *mp = m_pullup(*mp, off); 426 if (*mp == NULL) 427 return (ENOMEM); 428 429 ip = mtod(*mp, struct ip *); /* Outer IP header */ 430 icmp = L3HDR(ip, struct icmp *); 431 switch (icmp->icmp_type) { 432 case ICMP_ECHO: 433 case ICMP_ECHOREPLY: 434 /* Use icmp ID as distinguisher */ 435 *port = ntohs(icmp->icmp_id); 436 return (0); 437 case ICMP_UNREACH: 438 case ICMP_TIMXCEED: 439 break; 440 default: 441 return (EOPNOTSUPP); 442 } 443 /* 444 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits 445 * of ULP header. 446 */ 447 if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) 448 return (EINVAL); 449 if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) 450 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN); 451 if (*mp == NULL) 452 return (ENOMEM); 453 ip = mtodo(*mp, off); /* Inner IP header */ 454 inner_proto = ip->ip_p; 455 off += ip->ip_hl << 2; /* Skip inner IP header */ 456 *addr = ntohl(ip->ip_src.s_addr); 457 if ((*mp)->m_len < off + ICMP_MINLEN) 458 *mp = m_pullup(*mp, off + ICMP_MINLEN); 459 if (*mp == NULL) 460 return (ENOMEM); 461 switch (inner_proto) { 462 case IPPROTO_TCP: 463 case IPPROTO_UDP: 464 /* Copy source port from the header */ 465 *port = ntohs(*((uint16_t *)mtodo(*mp, off))); 466 *proto = inner_proto; 467 return (0); 468 case IPPROTO_ICMP: 469 /* 470 * We will translate only ICMP errors for our ICMP 471 * echo requests. 472 */ 473 icmp = mtodo(*mp, off); 474 if (icmp->icmp_type != ICMP_ECHO) 475 return (EOPNOTSUPP); 476 *port = ntohs(icmp->icmp_id); 477 return (0); 478 }; 479 return (EOPNOTSUPP); 480 } 481 482 static struct nat64lsn_state* 483 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias, 484 in_addr_t faddr, uint16_t port, uint8_t proto) 485 { 486 struct nat64lsn_state *state; 487 struct nat64lsn_pg *pg; 488 int chunk_idx, pg_idx, state_idx; 489 490 NAT64LSN_EPOCH_ASSERT(); 491 492 if (port < NAT64_MIN_PORT) 493 return (NULL); 494 /* 495 * Alias keeps 32 pgchunks for each protocol. 496 * Each pgchunk has 32 pointers to portgroup. 497 * Each portgroup has 64 states for ports. 498 */ 499 port -= NAT64_MIN_PORT; 500 chunk_idx = port / 2048; 501 502 port -= chunk_idx * 2048; 503 pg_idx = port / 64; 504 state_idx = port % 64; 505 506 /* 507 * First check in proto_chunkmask that we have allocated PG chunk. 508 * Then check in proto_pgmask that we have valid PG pointer. 509 */ 510 pg = NULL; 511 switch (proto) { 512 case IPPROTO_TCP: 513 if (ISSET32(alias->tcp_chunkmask, chunk_idx) && 514 ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) { 515 pg = alias->tcp[chunk_idx]->pgptr[pg_idx]; 516 break; 517 } 518 return (NULL); 519 case IPPROTO_UDP: 520 if (ISSET32(alias->udp_chunkmask, chunk_idx) && 521 ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) { 522 pg = alias->udp[chunk_idx]->pgptr[pg_idx]; 523 break; 524 } 525 return (NULL); 526 case IPPROTO_ICMP: 527 if (ISSET32(alias->icmp_chunkmask, chunk_idx) && 528 ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) { 529 pg = alias->icmp[chunk_idx]->pgptr[pg_idx]; 530 break; 531 } 532 return (NULL); 533 default: 534 panic("%s: wrong proto %d", __func__, proto); 535 } 536 if (pg == NULL) 537 return (NULL); 538 539 if (FREEMASK_ISSET(pg, faddr, state_idx)) 540 return (NULL); 541 542 state = &STATES_CHUNK(pg, faddr)->state[state_idx]; 543 ck_pr_fence_load(); 544 if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY) 545 return (state); 546 return (NULL); 547 } 548 549 /* 550 * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields 551 * that might be unknown until reassembling is completed. 552 */ 553 static struct mbuf* 554 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m, 555 uint16_t *port) 556 { 557 struct ip *ip; 558 int len; 559 560 m = ip_reass(m); 561 if (m == NULL) 562 return (NULL); 563 /* IP header must be contigious after ip_reass() */ 564 ip = mtod(m, struct ip *); 565 len = ip->ip_hl << 2; 566 switch (ip->ip_p) { 567 case IPPROTO_ICMP: 568 len += ICMP_MINLEN; 569 break; 570 case IPPROTO_TCP: 571 len += sizeof(struct tcphdr); 572 break; 573 case IPPROTO_UDP: 574 len += sizeof(struct udphdr); 575 break; 576 default: 577 m_freem(m); 578 NAT64STAT_INC(&cfg->base.stats, noproto); 579 return (NULL); 580 } 581 if (m->m_len < len) { 582 m = m_pullup(m, len); 583 if (m == NULL) { 584 NAT64STAT_INC(&cfg->base.stats, nomem); 585 return (NULL); 586 } 587 ip = mtod(m, struct ip *); 588 } 589 switch (ip->ip_p) { 590 case IPPROTO_TCP: 591 *port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport); 592 break; 593 case IPPROTO_UDP: 594 *port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport); 595 break; 596 } 597 return (m); 598 } 599 600 static int 601 nat64lsn_translate4(struct nat64lsn_cfg *cfg, 602 const struct ipfw_flow_id *f_id, struct mbuf **mp) 603 { 604 struct pfloghdr loghdr, *logdata; 605 struct in6_addr src6; 606 struct nat64lsn_state *state; 607 struct nat64lsn_alias *alias; 608 uint32_t addr, flags; 609 uint16_t port, ts; 610 int ret; 611 uint8_t proto; 612 613 addr = f_id->dst_ip; 614 port = f_id->dst_port; 615 proto = f_id->proto; 616 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 617 NAT64STAT_INC(&cfg->base.stats, nomatch4); 618 return (cfg->nomatch_verdict); 619 } 620 621 /* Reassemble fragments if needed */ 622 ret = ntohs(mtod(*mp, struct ip *)->ip_off); 623 if ((ret & (IP_MF | IP_OFFMASK)) != 0) { 624 *mp = nat64lsn_reassemble4(cfg, *mp, &port); 625 if (*mp == NULL) 626 return (IP_FW_DENY); 627 } 628 629 /* Check if protocol is supported */ 630 switch (proto) { 631 case IPPROTO_ICMP: 632 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port); 633 if (ret != 0) { 634 if (ret == ENOMEM) { 635 NAT64STAT_INC(&cfg->base.stats, nomem); 636 return (IP_FW_DENY); 637 } 638 NAT64STAT_INC(&cfg->base.stats, noproto); 639 return (cfg->nomatch_verdict); 640 } 641 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 642 NAT64STAT_INC(&cfg->base.stats, nomatch4); 643 return (cfg->nomatch_verdict); 644 } 645 /* FALLTHROUGH */ 646 case IPPROTO_TCP: 647 case IPPROTO_UDP: 648 break; 649 default: 650 NAT64STAT_INC(&cfg->base.stats, noproto); 651 return (cfg->nomatch_verdict); 652 } 653 654 alias = &ALIAS_BYHASH(cfg, addr); 655 MPASS(addr == alias->addr); 656 657 /* Check that we have state for this port */ 658 state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip, 659 port, proto); 660 if (state == NULL) { 661 NAT64STAT_INC(&cfg->base.stats, nomatch4); 662 return (cfg->nomatch_verdict); 663 } 664 665 /* TODO: Check flags to see if we need to do some static mapping */ 666 667 /* Update some state fields if need */ 668 SET_AGE(ts); 669 if (f_id->proto == IPPROTO_TCP) 670 flags = convert_tcp_flags(f_id->_flags); 671 else 672 flags = 0; 673 if (state->timestamp != ts) 674 state->timestamp = ts; 675 if ((state->flags & flags) != flags) 676 state->flags |= flags; 677 678 port = htons(state->sport); 679 src6 = state->ip6_dst; 680 681 if (cfg->base.flags & NAT64_LOG) { 682 logdata = &loghdr; 683 nat64lsn_log(logdata, *mp, AF_INET, state); 684 } else 685 logdata = NULL; 686 687 /* 688 * We already have src6 with embedded address, but it is possible, 689 * that src_ip is different than state->ip_dst, this is why we 690 * do embedding again. 691 */ 692 nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip)); 693 ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port, 694 &cfg->base, logdata); 695 if (ret == NAT64SKIP) 696 return (cfg->nomatch_verdict); 697 if (ret == NAT64RETURN) 698 *mp = NULL; 699 return (IP_FW_DENY); 700 } 701 702 /* 703 * Check if particular state is stale and should be deleted. 704 * Return 1 if true, 0 otherwise. 705 */ 706 static int 707 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state) 708 { 709 int age, ttl; 710 711 /* State was marked as stale in previous pass. */ 712 if (ISSET32(state->flags, NAT64_BIT_STALE)) 713 return (1); 714 715 /* State is not yet initialized, it is going to be READY */ 716 if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4)) 717 return (0); 718 719 age = GET_AGE(state->timestamp); 720 switch (state->proto) { 721 case IPPROTO_TCP: 722 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN)) 723 ttl = cfg->st_close_ttl; 724 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB)) 725 ttl = cfg->st_estab_ttl; 726 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN)) 727 ttl = cfg->st_syn_ttl; 728 else 729 ttl = cfg->st_syn_ttl; 730 if (age > ttl) 731 return (1); 732 break; 733 case IPPROTO_UDP: 734 if (age > cfg->st_udp_ttl) 735 return (1); 736 break; 737 case IPPROTO_ICMP: 738 if (age > cfg->st_icmp_ttl) 739 return (1); 740 break; 741 } 742 return (0); 743 } 744 745 #define PGCOUNT_ADD(alias, proto, value) \ 746 switch (proto) { \ 747 case IPPROTO_TCP: (alias)->tcp_pgcount += (value); break; \ 748 case IPPROTO_UDP: (alias)->udp_pgcount += (value); break; \ 749 case IPPROTO_ICMP: (alias)->icmp_pgcount += (value); break; \ 750 } 751 #define PGCOUNT_INC(alias, proto) PGCOUNT_ADD(alias, proto, 1) 752 #define PGCOUNT_DEC(alias, proto) PGCOUNT_ADD(alias, proto, -1) 753 754 static inline void 755 nat64lsn_state_cleanup(struct nat64lsn_state *state) 756 { 757 758 /* 759 * Reset READY flag and wait until it become 760 * safe for translate4. 761 */ 762 ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4); 763 /* 764 * And set STALE flag for deferred deletion in the 765 * next pass of nat64lsn_maintain_pg(). 766 */ 767 ck_pr_bts_32(&state->flags, NAT64_BIT_STALE); 768 ck_pr_fence_store(); 769 } 770 771 static int 772 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg) 773 { 774 struct nat64lsn_state *state; 775 struct nat64lsn_host *host; 776 uint64_t freemask; 777 int c, i, update_age; 778 779 update_age = 0; 780 for (c = 0; c < pg->chunks_count; c++) { 781 FREEMASK_COPY(pg, c, freemask); 782 for (i = 0; i < 64; i++) { 783 if (ISSET64(freemask, i)) 784 continue; 785 state = &STATES_CHUNK(pg, c)->state[i]; 786 if (nat64lsn_check_state(cfg, state) == 0) { 787 update_age = 1; 788 continue; 789 } 790 /* 791 * Expire state: 792 * 1. Mark as STALE and unlink from host's hash. 793 * 2. Set bit in freemask. 794 */ 795 if (ISSET32(state->flags, NAT64_BIT_STALE)) { 796 /* 797 * State was marked as STALE in previous 798 * pass. Now it is safe to release it. 799 */ 800 state->flags = 0; 801 ck_pr_fence_store(); 802 FREEMASK_BTS(pg, c, i); 803 NAT64STAT_INC(&cfg->base.stats, sdeleted); 804 continue; 805 } 806 MPASS(state->flags & NAT64_FLAG_READY); 807 808 host = state->host; 809 HOST_LOCK(host); 810 CK_SLIST_REMOVE(&STATE_HASH(host, state->hval), 811 state, nat64lsn_state, entries); 812 /* 813 * Now translate6 will not use this state. 814 */ 815 host->states_count--; 816 HOST_UNLOCK(host); 817 nat64lsn_state_cleanup(state); 818 } 819 } 820 821 /* 822 * We have some alive states, update timestamp. 823 */ 824 if (update_age) 825 SET_AGE(pg->timestamp); 826 827 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) 828 return (0); 829 830 return (1); 831 } 832 833 static void 834 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg, 835 struct nat64lsn_pg_slist *portgroups) 836 { 837 struct nat64lsn_alias *alias; 838 struct nat64lsn_pg *pg, *tpg; 839 uint32_t *pgmask, *pgidx; 840 int i, idx; 841 842 for (i = 0; i < 1 << (32 - cfg->plen4); i++) { 843 alias = &cfg->aliases[i]; 844 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) { 845 if (nat64lsn_maintain_pg(cfg, pg) == 0) 846 continue; 847 /* Always keep first PG */ 848 if (pg->base_port == NAT64_MIN_PORT) 849 continue; 850 /* 851 * PG expires in two passes: 852 * 1. Reset bit in pgmask, mark it as DEAD. 853 * 2. Unlink it and schedule for deferred destroying. 854 */ 855 idx = (pg->base_port - NAT64_MIN_PORT) / 64; 856 switch (pg->proto) { 857 case IPPROTO_TCP: 858 pgmask = alias->tcp_pgmask; 859 pgidx = &alias->tcp_pgidx; 860 break; 861 case IPPROTO_UDP: 862 pgmask = alias->udp_pgmask; 863 pgidx = &alias->udp_pgidx; 864 break; 865 case IPPROTO_ICMP: 866 pgmask = alias->icmp_pgmask; 867 pgidx = &alias->icmp_pgidx; 868 break; 869 } 870 if (pg->flags & NAT64LSN_DEADPG) { 871 /* Unlink PG from alias's chain */ 872 ALIAS_LOCK(alias); 873 CK_SLIST_REMOVE(&alias->portgroups, pg, 874 nat64lsn_pg, entries); 875 PGCOUNT_DEC(alias, pg->proto); 876 ALIAS_UNLOCK(alias); 877 /* 878 * Link it to job's chain for deferred 879 * destroying. 880 */ 881 NAT64STAT_INC(&cfg->base.stats, spgdeleted); 882 CK_SLIST_INSERT_HEAD(portgroups, pg, entries); 883 continue; 884 } 885 886 /* Reset the corresponding bit in pgmask array. */ 887 ck_pr_btr_32(&pgmask[idx / 32], idx % 32); 888 pg->flags |= NAT64LSN_DEADPG; 889 ck_pr_fence_store(); 890 /* If last used PG points to this PG, reset it. */ 891 ck_pr_cas_32(pgidx, idx, 0); 892 } 893 } 894 } 895 896 static void 897 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg, 898 struct nat64lsn_hosts_slist *hosts) 899 { 900 struct nat64lsn_host *host, *tmp; 901 int i; 902 903 for (i = 0; i < cfg->hosts_hashsize; i++) { 904 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i], 905 entries, tmp) { 906 /* Is host was marked in previous call? */ 907 if (host->flags & NAT64LSN_DEADHOST) { 908 if (host->states_count > 0 || 909 GET_AGE(host->timestamp) < 910 cfg->host_delete_delay) { 911 host->flags &= ~NAT64LSN_DEADHOST; 912 continue; 913 } 914 /* 915 * Unlink host from hash table and schedule 916 * it for deferred destroying. 917 */ 918 CFG_LOCK(cfg); 919 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host, 920 nat64lsn_host, entries); 921 cfg->hosts_count--; 922 CFG_UNLOCK(cfg); 923 CK_SLIST_INSERT_HEAD(hosts, host, entries); 924 continue; 925 } 926 if (host->states_count > 0 || 927 GET_AGE(host->timestamp) < cfg->host_delete_delay) 928 continue; 929 /* Mark host as going to be expired in next pass */ 930 host->flags |= NAT64LSN_DEADHOST; 931 ck_pr_fence_store(); 932 } 933 } 934 } 935 936 static struct nat64lsn_pgchunk* 937 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg) 938 { 939 #if 0 940 struct nat64lsn_alias *alias; 941 struct nat64lsn_pgchunk *chunk; 942 uint32_t pgmask; 943 int i, c; 944 945 for (i = 0; i < 1 << (32 - cfg->plen4); i++) { 946 alias = &cfg->aliases[i]; 947 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay) 948 continue; 949 /* Always keep single chunk allocated */ 950 for (c = 1; c < 32; c++) { 951 if ((alias->tcp_chunkmask & (1 << c)) == 0) 952 break; 953 chunk = ck_pr_load_ptr(&alias->tcp[c]); 954 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0) 955 continue; 956 ck_pr_btr_32(&alias->tcp_chunkmask, c); 957 ck_pr_fence_load(); 958 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0) 959 continue; 960 } 961 } 962 #endif 963 return (NULL); 964 } 965 966 #if 0 967 static void 968 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg) 969 { 970 struct nat64lsn_host *h; 971 struct nat64lsn_states_slist *hash; 972 int i, j, hsize; 973 974 for (i = 0; i < cfg->hosts_hashsize; i++) { 975 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) { 976 if (h->states_count / 2 < h->states_hashsize || 977 h->states_hashsize >= NAT64LSN_MAX_HSIZE) 978 continue; 979 hsize = h->states_hashsize * 2; 980 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT); 981 if (hash == NULL) 982 continue; 983 for (j = 0; j < hsize; j++) 984 CK_SLIST_INIT(&hash[i]); 985 986 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH); 987 } 988 } 989 } 990 #endif 991 992 /* 993 * This procedure is used to perform various maintance 994 * on dynamic hash list. Currently it is called every 4 seconds. 995 */ 996 static void 997 nat64lsn_periodic(void *data) 998 { 999 struct nat64lsn_job_item *ji; 1000 struct nat64lsn_cfg *cfg; 1001 1002 cfg = (struct nat64lsn_cfg *) data; 1003 CURVNET_SET(cfg->vp); 1004 if (cfg->hosts_count > 0) { 1005 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT); 1006 if (ji != NULL) { 1007 ji->jtype = JTYPE_DESTROY; 1008 CK_SLIST_INIT(&ji->hosts); 1009 CK_SLIST_INIT(&ji->portgroups); 1010 nat64lsn_expire_hosts(cfg, &ji->hosts); 1011 nat64lsn_expire_portgroups(cfg, &ji->portgroups); 1012 ji->pgchunk = nat64lsn_expire_pgchunk(cfg); 1013 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, 1014 nat64lsn_job_destroy); 1015 } else 1016 NAT64STAT_INC(&cfg->base.stats, jnomem); 1017 } 1018 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); 1019 CURVNET_RESTORE(); 1020 } 1021 1022 #define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0) 1023 #define HOST_ERROR(stage) ALLOC_ERROR(stage, 1) 1024 #define PG_ERROR(stage) ALLOC_ERROR(stage, 2) 1025 static int 1026 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1027 { 1028 char a[INET6_ADDRSTRLEN]; 1029 struct nat64lsn_aliaslink *link; 1030 struct nat64lsn_host *host; 1031 struct nat64lsn_state *state; 1032 uint32_t hval, data[2]; 1033 int i; 1034 1035 /* Check that host was not yet added. */ 1036 NAT64LSN_EPOCH_ASSERT(); 1037 CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) { 1038 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) { 1039 /* The host was allocated in previous call. */ 1040 ji->host = host; 1041 goto get_state; 1042 } 1043 } 1044 1045 host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); 1046 if (ji->host == NULL) 1047 return (HOST_ERROR(1)); 1048 1049 host->states_hashsize = NAT64LSN_HSIZE; 1050 host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) * 1051 host->states_hashsize, M_NAT64LSN, M_NOWAIT); 1052 if (host->states_hash == NULL) { 1053 uma_zfree(nat64lsn_host_zone, host); 1054 return (HOST_ERROR(2)); 1055 } 1056 1057 link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT); 1058 if (link == NULL) { 1059 free(host->states_hash, M_NAT64LSN); 1060 uma_zfree(nat64lsn_host_zone, host); 1061 return (HOST_ERROR(3)); 1062 } 1063 1064 /* Initialize */ 1065 HOST_LOCK_INIT(host); 1066 SET_AGE(host->timestamp); 1067 host->addr = ji->f_id.src_ip6; 1068 host->hval = ji->src6_hval; 1069 host->flags = 0; 1070 host->states_count = 0; 1071 CK_SLIST_INIT(&host->aliases); 1072 for (i = 0; i < host->states_hashsize; i++) 1073 CK_SLIST_INIT(&host->states_hash[i]); 1074 1075 link->alias = nat64lsn_get_alias(cfg, &ji->f_id); 1076 CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries); 1077 1078 ALIAS_LOCK(link->alias); 1079 CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries); 1080 link->alias->hosts_count++; 1081 ALIAS_UNLOCK(link->alias); 1082 1083 CFG_LOCK(cfg); 1084 CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries); 1085 cfg->hosts_count++; 1086 CFG_UNLOCK(cfg); 1087 1088 get_state: 1089 data[0] = ji->faddr; 1090 data[1] = (ji->f_id.dst_port << 16) | ji->port; 1091 ji->state_hval = hval = STATE_HVAL(cfg, data); 1092 state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval, 1093 ji->faddr, ji->port, ji->proto); 1094 /* 1095 * We failed to obtain new state, used alias needs new PG. 1096 * XXX: or another alias should be used. 1097 */ 1098 if (state == NULL) { 1099 /* Try to allocate new PG */ 1100 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0)) 1101 return (HOST_ERROR(4)); 1102 /* We assume that nat64lsn_alloc_pg() got state */ 1103 } else 1104 ji->state = state; 1105 1106 ji->done = 1; 1107 DPRINTF(DP_OBJ, "ALLOC HOST %s %p", 1108 inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host); 1109 return (HOST_ERROR(0)); 1110 } 1111 1112 static int 1113 nat64lsn_find_pg_place(uint32_t *data) 1114 { 1115 int i; 1116 1117 for (i = 0; i < 32; i++) { 1118 if (~data[i] == 0) 1119 continue; 1120 return (i * 32 + ffs(~data[i]) - 1); 1121 } 1122 return (-1); 1123 } 1124 1125 static int 1126 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg, 1127 struct nat64lsn_alias *alias, uint32_t *chunkmask, uint32_t *pgmask, 1128 struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, uint8_t proto) 1129 { 1130 struct nat64lsn_pg *pg; 1131 int i, pg_idx, chunk_idx; 1132 1133 /* Find place in pgchunk where PG can be added */ 1134 pg_idx = nat64lsn_find_pg_place(pgmask); 1135 if (pg_idx < 0) /* no more PGs */ 1136 return (PG_ERROR(1)); 1137 /* Check that we have allocated pgchunk for given PG index */ 1138 chunk_idx = pg_idx / 32; 1139 if (!ISSET32(*chunkmask, chunk_idx)) { 1140 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone, 1141 M_NOWAIT); 1142 if (chunks[chunk_idx] == NULL) 1143 return (PG_ERROR(2)); 1144 ck_pr_bts_32(chunkmask, chunk_idx); 1145 ck_pr_fence_store(); 1146 } 1147 /* Allocate PG and states chunks */ 1148 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); 1149 if (pg == NULL) 1150 return (PG_ERROR(3)); 1151 pg->chunks_count = cfg->states_chunks; 1152 if (pg->chunks_count > 1) { 1153 pg->freemask_chunk = malloc(pg->chunks_count * 1154 sizeof(uint64_t), M_NAT64LSN, M_NOWAIT); 1155 if (pg->freemask_chunk == NULL) { 1156 uma_zfree(nat64lsn_pg_zone, pg); 1157 return (PG_ERROR(4)); 1158 } 1159 pg->states_chunk = malloc(pg->chunks_count * 1160 sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN, 1161 M_NOWAIT | M_ZERO); 1162 if (pg->states_chunk == NULL) { 1163 free(pg->freemask_chunk, M_NAT64LSN); 1164 uma_zfree(nat64lsn_pg_zone, pg); 1165 return (PG_ERROR(5)); 1166 } 1167 for (i = 0; i < pg->chunks_count; i++) { 1168 pg->states_chunk[i] = uma_zalloc( 1169 nat64lsn_state_zone, M_NOWAIT); 1170 if (pg->states_chunk[i] == NULL) 1171 goto states_failed; 1172 } 1173 memset(pg->freemask_chunk, 0xff, 1174 sizeof(uint64_t) * pg->chunks_count); 1175 } else { 1176 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT); 1177 if (pg->states == NULL) { 1178 uma_zfree(nat64lsn_pg_zone, pg); 1179 return (PG_ERROR(6)); 1180 } 1181 memset(&pg->freemask64, 0xff, sizeof(uint64_t)); 1182 } 1183 1184 /* Initialize PG and hook it to pgchunk */ 1185 SET_AGE(pg->timestamp); 1186 pg->flags = 0; 1187 pg->proto = proto; 1188 pg->base_port = NAT64_MIN_PORT + 64 * pg_idx; 1189 ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg); 1190 ck_pr_fence_store(); 1191 1192 /* Set bit in pgmask and set index of last used PG */ 1193 ck_pr_bts_32(&pgmask[chunk_idx], pg_idx % 32); 1194 ck_pr_store_32(pgidx, pg_idx); 1195 1196 ALIAS_LOCK(alias); 1197 CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries); 1198 SET_AGE(alias->timestamp); 1199 PGCOUNT_INC(alias, proto); 1200 ALIAS_UNLOCK(alias); 1201 NAT64STAT_INC(&cfg->base.stats, spgcreated); 1202 return (PG_ERROR(0)); 1203 1204 states_failed: 1205 for (i = 0; i < pg->chunks_count; i++) 1206 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]); 1207 free(pg->freemask_chunk, M_NAT64LSN); 1208 free(pg->states_chunk, M_NAT64LSN); 1209 uma_zfree(nat64lsn_pg_zone, pg); 1210 return (PG_ERROR(7)); 1211 } 1212 1213 static int 1214 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1215 { 1216 struct nat64lsn_aliaslink *link; 1217 struct nat64lsn_alias *alias; 1218 int ret; 1219 1220 link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id); 1221 if (link == NULL) 1222 return (PG_ERROR(1)); 1223 1224 /* 1225 * TODO: check that we did not already allocated PG in 1226 * previous call. 1227 */ 1228 1229 ret = 0; 1230 alias = link->alias; 1231 /* Find place in pgchunk where PG can be added */ 1232 switch (ji->proto) { 1233 case IPPROTO_TCP: 1234 ret = nat64lsn_alloc_proto_pg(cfg, alias, 1235 &alias->tcp_chunkmask, alias->tcp_pgmask, 1236 alias->tcp, &alias->tcp_pgidx, ji->proto); 1237 break; 1238 case IPPROTO_UDP: 1239 ret = nat64lsn_alloc_proto_pg(cfg, alias, 1240 &alias->udp_chunkmask, alias->udp_pgmask, 1241 alias->udp, &alias->udp_pgidx, ji->proto); 1242 break; 1243 case IPPROTO_ICMP: 1244 ret = nat64lsn_alloc_proto_pg(cfg, alias, 1245 &alias->icmp_chunkmask, alias->icmp_pgmask, 1246 alias->icmp, &alias->icmp_pgidx, ji->proto); 1247 break; 1248 default: 1249 panic("%s: wrong proto %d", __func__, ji->proto); 1250 } 1251 if (ret == PG_ERROR(1)) { 1252 /* 1253 * PG_ERROR(1) means that alias lacks free PGs 1254 * XXX: try next alias. 1255 */ 1256 printf("NAT64LSN: %s: failed to obtain PG\n", 1257 __func__); 1258 return (ret); 1259 } 1260 if (ret == PG_ERROR(0)) { 1261 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id, 1262 ji->state_hval, ji->faddr, ji->port, ji->proto); 1263 if (ji->state == NULL) 1264 ret = PG_ERROR(8); 1265 else 1266 ji->done = 1; 1267 } 1268 return (ret); 1269 } 1270 1271 static void 1272 nat64lsn_do_request(void *data) 1273 { 1274 struct epoch_tracker et; 1275 struct nat64lsn_job_head jhead; 1276 struct nat64lsn_job_item *ji, *ji2; 1277 struct nat64lsn_cfg *cfg; 1278 int jcount; 1279 uint8_t flags; 1280 1281 cfg = (struct nat64lsn_cfg *)data; 1282 if (cfg->jlen == 0) 1283 return; 1284 1285 CURVNET_SET(cfg->vp); 1286 STAILQ_INIT(&jhead); 1287 1288 /* Grab queue */ 1289 JQUEUE_LOCK(); 1290 STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item); 1291 jcount = cfg->jlen; 1292 cfg->jlen = 0; 1293 JQUEUE_UNLOCK(); 1294 1295 /* TODO: check if we need to resize hash */ 1296 1297 NAT64STAT_INC(&cfg->base.stats, jcalls); 1298 DPRINTF(DP_JQUEUE, "count=%d", jcount); 1299 1300 /* 1301 * TODO: 1302 * What we should do here is to build a hash 1303 * to ensure we don't have lots of duplicate requests. 1304 * Skip this for now. 1305 * 1306 * TODO: Limit per-call number of items 1307 */ 1308 1309 NAT64LSN_EPOCH_ENTER(et); 1310 STAILQ_FOREACH(ji, &jhead, entries) { 1311 switch (ji->jtype) { 1312 case JTYPE_NEWHOST: 1313 if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0)) 1314 NAT64STAT_INC(&cfg->base.stats, jhostfails); 1315 break; 1316 case JTYPE_NEWPORTGROUP: 1317 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0)) 1318 NAT64STAT_INC(&cfg->base.stats, jportfails); 1319 break; 1320 default: 1321 continue; 1322 } 1323 if (ji->done != 0) { 1324 flags = ji->proto != IPPROTO_TCP ? 0 : 1325 convert_tcp_flags(ji->f_id._flags); 1326 nat64lsn_translate6_internal(cfg, &ji->m, 1327 ji->state, flags); 1328 NAT64STAT_INC(&cfg->base.stats, jreinjected); 1329 } 1330 } 1331 NAT64LSN_EPOCH_EXIT(et); 1332 1333 ji = STAILQ_FIRST(&jhead); 1334 while (ji != NULL) { 1335 ji2 = STAILQ_NEXT(ji, entries); 1336 /* 1337 * In any case we must free mbuf if 1338 * translator did not consumed it. 1339 */ 1340 m_freem(ji->m); 1341 uma_zfree(nat64lsn_job_zone, ji); 1342 ji = ji2; 1343 } 1344 CURVNET_RESTORE(); 1345 } 1346 1347 static struct nat64lsn_job_item * 1348 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype) 1349 { 1350 struct nat64lsn_job_item *ji; 1351 1352 /* 1353 * Do not try to lock possibly contested mutex if we're near the 1354 * limit. Drop packet instead. 1355 */ 1356 ji = NULL; 1357 if (cfg->jlen >= cfg->jmaxlen) 1358 NAT64STAT_INC(&cfg->base.stats, jmaxlen); 1359 else { 1360 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT); 1361 if (ji == NULL) 1362 NAT64STAT_INC(&cfg->base.stats, jnomem); 1363 } 1364 if (ji == NULL) { 1365 NAT64STAT_INC(&cfg->base.stats, dropped); 1366 DPRINTF(DP_DROPS, "failed to create job"); 1367 } else { 1368 ji->jtype = jtype; 1369 ji->done = 0; 1370 } 1371 return (ji); 1372 } 1373 1374 static void 1375 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1376 { 1377 1378 JQUEUE_LOCK(); 1379 STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries); 1380 NAT64STAT_INC(&cfg->base.stats, jrequests); 1381 cfg->jlen++; 1382 1383 if (callout_pending(&cfg->jcallout) == 0) 1384 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); 1385 JQUEUE_UNLOCK(); 1386 } 1387 1388 /* 1389 * This function is used to clean up the result of less likely possible 1390 * race condition, when host object was deleted, but some translation 1391 * state was created before it is destroyed. 1392 * 1393 * Since the state expiration removes state from host's hash table, 1394 * we need to be sure, that there will not any states, that are linked 1395 * with this host entry. 1396 */ 1397 static void 1398 nat64lsn_host_cleanup(struct nat64lsn_host *host) 1399 { 1400 struct nat64lsn_state *state, *ts; 1401 int i; 1402 1403 printf("NAT64LSN: %s: race condition has been detected for host %p\n", 1404 __func__, host); 1405 for (i = 0; i < host->states_hashsize; i++) { 1406 CK_SLIST_FOREACH_SAFE(state, &host->states_hash[i], 1407 entries, ts) { 1408 /* 1409 * We can remove the state without lock, 1410 * because this host entry is unlinked and will 1411 * be destroyed. 1412 */ 1413 CK_SLIST_REMOVE(&host->states_hash[i], state, 1414 nat64lsn_state, entries); 1415 host->states_count--; 1416 nat64lsn_state_cleanup(state); 1417 } 1418 } 1419 MPASS(host->states_count == 0); 1420 } 1421 1422 /* 1423 * This function is used to clean up the result of less likely possible 1424 * race condition, when portgroup was deleted, but some translation state 1425 * was created before it is destroyed. 1426 * 1427 * Since states entries are accessible via host's hash table, we need 1428 * to be sure, that there will not any states from this PG, that are 1429 * linked with any host entries. 1430 */ 1431 static void 1432 nat64lsn_pg_cleanup(struct nat64lsn_pg *pg) 1433 { 1434 struct nat64lsn_state *state; 1435 uint64_t usedmask; 1436 int c, i; 1437 1438 printf("NAT64LSN: %s: race condition has been detected for pg %p\n", 1439 __func__, pg); 1440 for (c = 0; c < pg->chunks_count; c++) { 1441 /* 1442 * Use inverted freemask to find what state was created. 1443 */ 1444 usedmask = ~(*FREEMASK_CHUNK(pg, c)); 1445 if (usedmask == 0) 1446 continue; 1447 for (i = 0; i < 64; i++) { 1448 if (!ISSET64(usedmask, i)) 1449 continue; 1450 state = &STATES_CHUNK(pg, c)->state[i]; 1451 /* 1452 * If we have STALE bit, this means that state 1453 * is already unlinked from host's hash table. 1454 * Thus we can just reset the bit in mask and 1455 * schedule destroying in the next epoch call. 1456 */ 1457 if (ISSET32(state->flags, NAT64_BIT_STALE)) { 1458 FREEMASK_BTS(pg, c, i); 1459 continue; 1460 } 1461 /* 1462 * There is small window, when we have bit 1463 * grabbed from freemask, but state is not yet 1464 * linked into host's hash table. 1465 * Check for READY flag, it is set just after 1466 * linking. If it is not set, defer cleanup 1467 * for next call. 1468 */ 1469 if (ISSET32(state->flags, NAT64_BIT_READY_IPV4)) { 1470 struct nat64lsn_host *host; 1471 1472 host = state->host; 1473 HOST_LOCK(host); 1474 CK_SLIST_REMOVE(&STATE_HASH(host, 1475 state->hval), state, nat64lsn_state, 1476 entries); 1477 host->states_count--; 1478 HOST_UNLOCK(host); 1479 nat64lsn_state_cleanup(state); 1480 } 1481 } 1482 } 1483 } 1484 1485 static void 1486 nat64lsn_job_destroy(epoch_context_t ctx) 1487 { 1488 struct nat64lsn_hosts_slist hosts; 1489 struct nat64lsn_pg_slist portgroups; 1490 struct nat64lsn_job_item *ji; 1491 struct nat64lsn_host *host; 1492 struct nat64lsn_pg *pg; 1493 int i; 1494 1495 CK_SLIST_INIT(&hosts); 1496 CK_SLIST_INIT(&portgroups); 1497 ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx); 1498 MPASS(ji->jtype == JTYPE_DESTROY); 1499 while (!CK_SLIST_EMPTY(&ji->hosts)) { 1500 host = CK_SLIST_FIRST(&ji->hosts); 1501 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries); 1502 if (host->states_count > 0) { 1503 /* 1504 * The state has been created during host deletion. 1505 */ 1506 printf("NAT64LSN: %s: destroying host with %d " 1507 "states\n", __func__, host->states_count); 1508 /* 1509 * We need to cleanup these states to avoid 1510 * possible access to already deleted host in 1511 * the state expiration code. 1512 */ 1513 nat64lsn_host_cleanup(host); 1514 CK_SLIST_INSERT_HEAD(&hosts, host, entries); 1515 /* 1516 * Keep host entry for next deferred destroying. 1517 * In the next epoch its states will be not 1518 * accessible. 1519 */ 1520 continue; 1521 } 1522 nat64lsn_destroy_host(host); 1523 } 1524 while (!CK_SLIST_EMPTY(&ji->portgroups)) { 1525 pg = CK_SLIST_FIRST(&ji->portgroups); 1526 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries); 1527 for (i = 0; i < pg->chunks_count; i++) { 1528 if (FREEMASK_BITCOUNT(pg, i) != 64) { 1529 /* 1530 * A state has been created during 1531 * PG deletion. 1532 */ 1533 printf("NAT64LSN: %s: destroying PG %p " 1534 "with non-empty chunk %d\n", __func__, 1535 pg, i); 1536 nat64lsn_pg_cleanup(pg); 1537 CK_SLIST_INSERT_HEAD(&portgroups, 1538 pg, entries); 1539 i = -1; 1540 break; 1541 } 1542 } 1543 if (i != -1) 1544 nat64lsn_destroy_pg(pg); 1545 } 1546 if (CK_SLIST_EMPTY(&hosts) && 1547 CK_SLIST_EMPTY(&portgroups)) { 1548 uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk); 1549 uma_zfree(nat64lsn_job_zone, ji); 1550 return; 1551 } 1552 1553 /* Schedule job item again */ 1554 CK_SLIST_MOVE(&ji->hosts, &hosts, entries); 1555 CK_SLIST_MOVE(&ji->portgroups, &portgroups, entries); 1556 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, nat64lsn_job_destroy); 1557 } 1558 1559 static int 1560 nat64lsn_request_host(struct nat64lsn_cfg *cfg, 1561 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval, 1562 in_addr_t faddr, uint16_t port, uint8_t proto) 1563 { 1564 struct nat64lsn_job_item *ji; 1565 1566 ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST); 1567 if (ji != NULL) { 1568 ji->m = *mp; 1569 ji->f_id = *f_id; 1570 ji->faddr = faddr; 1571 ji->port = port; 1572 ji->proto = proto; 1573 ji->src6_hval = hval; 1574 1575 nat64lsn_enqueue_job(cfg, ji); 1576 NAT64STAT_INC(&cfg->base.stats, jhostsreq); 1577 *mp = NULL; 1578 } 1579 return (IP_FW_DENY); 1580 } 1581 1582 static int 1583 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host, 1584 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval, 1585 in_addr_t faddr, uint16_t port, uint8_t proto) 1586 { 1587 struct nat64lsn_job_item *ji; 1588 1589 ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP); 1590 if (ji != NULL) { 1591 ji->m = *mp; 1592 ji->f_id = *f_id; 1593 ji->faddr = faddr; 1594 ji->port = port; 1595 ji->proto = proto; 1596 ji->state_hval = hval; 1597 ji->host = host; 1598 1599 nat64lsn_enqueue_job(cfg, ji); 1600 NAT64STAT_INC(&cfg->base.stats, jportreq); 1601 *mp = NULL; 1602 } 1603 return (IP_FW_DENY); 1604 } 1605 1606 static int 1607 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp, 1608 struct nat64lsn_state *state, uint8_t flags) 1609 { 1610 struct pfloghdr loghdr, *logdata; 1611 int ret; 1612 uint16_t ts; 1613 1614 /* Update timestamp and flags if needed */ 1615 SET_AGE(ts); 1616 if (state->timestamp != ts) 1617 state->timestamp = ts; 1618 if ((state->flags & flags) != 0) 1619 state->flags |= flags; 1620 1621 if (cfg->base.flags & NAT64_LOG) { 1622 logdata = &loghdr; 1623 nat64lsn_log(logdata, *mp, AF_INET6, state); 1624 } else 1625 logdata = NULL; 1626 1627 ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src), 1628 htons(state->aport), &cfg->base, logdata); 1629 if (ret == NAT64SKIP) 1630 return (cfg->nomatch_verdict); 1631 if (ret == NAT64RETURN) 1632 *mp = NULL; 1633 return (IP_FW_DENY); 1634 } 1635 1636 static int 1637 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, 1638 struct mbuf **mp) 1639 { 1640 struct nat64lsn_state *state; 1641 struct nat64lsn_host *host; 1642 struct icmp6_hdr *icmp6; 1643 uint32_t addr, hval, data[2]; 1644 int offset, proto; 1645 uint16_t port; 1646 uint8_t flags; 1647 1648 /* Check if protocol is supported */ 1649 port = f_id->src_port; 1650 proto = f_id->proto; 1651 switch (f_id->proto) { 1652 case IPPROTO_ICMPV6: 1653 /* 1654 * For ICMPv6 echo reply/request we use icmp6_id as 1655 * local port. 1656 */ 1657 offset = 0; 1658 proto = nat64_getlasthdr(*mp, &offset); 1659 if (proto < 0) { 1660 NAT64STAT_INC(&cfg->base.stats, dropped); 1661 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 1662 return (IP_FW_DENY); 1663 } 1664 if (proto == IPPROTO_ICMPV6) { 1665 icmp6 = mtodo(*mp, offset); 1666 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || 1667 icmp6->icmp6_type == ICMP6_ECHO_REPLY) 1668 port = ntohs(icmp6->icmp6_id); 1669 } 1670 proto = IPPROTO_ICMP; 1671 /* FALLTHROUGH */ 1672 case IPPROTO_TCP: 1673 case IPPROTO_UDP: 1674 break; 1675 default: 1676 NAT64STAT_INC(&cfg->base.stats, noproto); 1677 return (cfg->nomatch_verdict); 1678 } 1679 1680 /* Extract IPv4 from destination IPv6 address */ 1681 addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen); 1682 if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) { 1683 char a[INET_ADDRSTRLEN]; 1684 1685 NAT64STAT_INC(&cfg->base.stats, dropped); 1686 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s", 1687 inet_ntop(AF_INET, &addr, a, sizeof(a))); 1688 return (IP_FW_DENY); /* XXX: add extra stats? */ 1689 } 1690 1691 /* Try to find host */ 1692 hval = HOST_HVAL(cfg, &f_id->src_ip6); 1693 CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) { 1694 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr)) 1695 break; 1696 } 1697 /* We use IPv4 address in host byte order */ 1698 addr = ntohl(addr); 1699 if (host == NULL) 1700 return (nat64lsn_request_host(cfg, f_id, mp, 1701 hval, addr, port, proto)); 1702 1703 flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags); 1704 1705 data[0] = addr; 1706 data[1] = (f_id->dst_port << 16) | port; 1707 hval = STATE_HVAL(cfg, data); 1708 state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr, 1709 port, proto); 1710 if (state == NULL) 1711 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr, 1712 port, proto)); 1713 return (nat64lsn_translate6_internal(cfg, mp, state, flags)); 1714 } 1715 1716 /* 1717 * Main dataplane entry point. 1718 */ 1719 int 1720 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, 1721 ipfw_insn *cmd, int *done) 1722 { 1723 struct nat64lsn_instance *i; 1724 ipfw_insn *icmd; 1725 int ret; 1726 1727 IPFW_RLOCK_ASSERT(ch); 1728 1729 *done = 0; /* continue the search in case of failure */ 1730 icmd = cmd + F_LEN(cmd); 1731 if (cmd->opcode != O_EXTERNAL_ACTION || 1732 insntod(cmd, kidx)->kidx != V_nat64lsn_eid || 1733 icmd->opcode != O_EXTERNAL_INSTANCE || 1734 (i = NAT64_LOOKUP(ch, icmd)) == NULL) 1735 return (IP_FW_DENY); 1736 1737 *done = 1; /* terminate the search */ 1738 1739 switch (args->f_id.addr_type) { 1740 case 4: 1741 ret = nat64lsn_translate4(i->cfg, &args->f_id, &args->m); 1742 break; 1743 case 6: 1744 /* 1745 * Check that destination IPv6 address matches our prefix6. 1746 */ 1747 if ((i->cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 && 1748 memcmp(&args->f_id.dst_ip6, &i->cfg->base.plat_prefix, 1749 i->cfg->base.plat_plen / 8) != 0) { 1750 ret = i->cfg->nomatch_verdict; 1751 break; 1752 } 1753 ret = nat64lsn_translate6(i->cfg, &args->f_id, &args->m); 1754 break; 1755 default: 1756 ret = i->cfg->nomatch_verdict; 1757 } 1758 1759 if (ret != IP_FW_PASS && args->m != NULL) { 1760 m_freem(args->m); 1761 args->m = NULL; 1762 } 1763 return (ret); 1764 } 1765 1766 static int 1767 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags) 1768 { 1769 struct nat64lsn_states_chunk *chunk; 1770 int i; 1771 1772 chunk = (struct nat64lsn_states_chunk *)mem; 1773 for (i = 0; i < 64; i++) 1774 chunk->state[i].flags = 0; 1775 return (0); 1776 } 1777 1778 void 1779 nat64lsn_init_internal(void) 1780 { 1781 1782 nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts", 1783 sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL, 1784 UMA_ALIGN_PTR, 0); 1785 nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks", 1786 sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL, 1787 UMA_ALIGN_PTR, 0); 1788 nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups", 1789 sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL, 1790 UMA_ALIGN_PTR, 0); 1791 nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links", 1792 sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL, 1793 UMA_ALIGN_PTR, 0); 1794 nat64lsn_state_zone = uma_zcreate("NAT64LSN states", 1795 sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor, 1796 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1797 nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs", 1798 sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL, 1799 UMA_ALIGN_PTR, 0); 1800 JQUEUE_LOCK_INIT(); 1801 } 1802 1803 void 1804 nat64lsn_uninit_internal(void) 1805 { 1806 1807 /* XXX: epoch_task drain */ 1808 JQUEUE_LOCK_DESTROY(); 1809 uma_zdestroy(nat64lsn_host_zone); 1810 uma_zdestroy(nat64lsn_pgchunk_zone); 1811 uma_zdestroy(nat64lsn_pg_zone); 1812 uma_zdestroy(nat64lsn_aliaslink_zone); 1813 uma_zdestroy(nat64lsn_state_zone); 1814 uma_zdestroy(nat64lsn_job_zone); 1815 } 1816 1817 void 1818 nat64lsn_start_instance(struct nat64lsn_cfg *cfg) 1819 { 1820 1821 CALLOUT_LOCK(cfg); 1822 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, 1823 nat64lsn_periodic, cfg); 1824 CALLOUT_UNLOCK(cfg); 1825 } 1826 1827 struct nat64lsn_cfg * 1828 nat64lsn_init_config(struct ip_fw_chain *ch, in_addr_t prefix, int plen) 1829 { 1830 struct nat64lsn_cfg *cfg; 1831 struct nat64lsn_alias *alias; 1832 int i, naddr; 1833 1834 cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN, 1835 M_WAITOK | M_ZERO); 1836 1837 CFG_LOCK_INIT(cfg); 1838 CALLOUT_LOCK_INIT(cfg); 1839 STAILQ_INIT(&cfg->jhead); 1840 cfg->vp = curvnet; 1841 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK); 1842 1843 cfg->hash_seed = arc4random(); 1844 cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE; 1845 cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) * 1846 cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO); 1847 for (i = 0; i < cfg->hosts_hashsize; i++) 1848 CK_SLIST_INIT(&cfg->hosts_hash[i]); 1849 1850 naddr = 1 << (32 - plen); 1851 cfg->prefix4 = prefix; 1852 cfg->pmask4 = prefix | (naddr - 1); 1853 cfg->plen4 = plen; 1854 cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr, 1855 M_NAT64LSN, M_WAITOK | M_ZERO); 1856 for (i = 0; i < naddr; i++) { 1857 alias = &cfg->aliases[i]; 1858 alias->addr = prefix + i; /* host byte order */ 1859 CK_SLIST_INIT(&alias->hosts); 1860 ALIAS_LOCK_INIT(alias); 1861 } 1862 1863 callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0); 1864 callout_init(&cfg->jcallout, CALLOUT_MPSAFE); 1865 1866 return (cfg); 1867 } 1868 1869 static void 1870 nat64lsn_destroy_pg(struct nat64lsn_pg *pg) 1871 { 1872 int i; 1873 1874 if (pg->chunks_count == 1) { 1875 uma_zfree(nat64lsn_state_zone, pg->states); 1876 } else { 1877 for (i = 0; i < pg->chunks_count; i++) 1878 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]); 1879 free(pg->states_chunk, M_NAT64LSN); 1880 free(pg->freemask_chunk, M_NAT64LSN); 1881 } 1882 uma_zfree(nat64lsn_pg_zone, pg); 1883 } 1884 1885 static void 1886 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg, 1887 struct nat64lsn_alias *alias) 1888 { 1889 struct nat64lsn_pg *pg; 1890 int i; 1891 1892 while (!CK_SLIST_EMPTY(&alias->portgroups)) { 1893 pg = CK_SLIST_FIRST(&alias->portgroups); 1894 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries); 1895 nat64lsn_destroy_pg(pg); 1896 } 1897 for (i = 0; i < 32; i++) { 1898 if (ISSET32(alias->tcp_chunkmask, i)) 1899 uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]); 1900 if (ISSET32(alias->udp_chunkmask, i)) 1901 uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]); 1902 if (ISSET32(alias->icmp_chunkmask, i)) 1903 uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]); 1904 } 1905 ALIAS_LOCK_DESTROY(alias); 1906 } 1907 1908 static void 1909 nat64lsn_destroy_host(struct nat64lsn_host *host) 1910 { 1911 struct nat64lsn_aliaslink *link; 1912 1913 while (!CK_SLIST_EMPTY(&host->aliases)) { 1914 link = CK_SLIST_FIRST(&host->aliases); 1915 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries); 1916 1917 ALIAS_LOCK(link->alias); 1918 CK_SLIST_REMOVE(&link->alias->hosts, link, 1919 nat64lsn_aliaslink, alias_entries); 1920 link->alias->hosts_count--; 1921 ALIAS_UNLOCK(link->alias); 1922 1923 uma_zfree(nat64lsn_aliaslink_zone, link); 1924 } 1925 HOST_LOCK_DESTROY(host); 1926 free(host->states_hash, M_NAT64LSN); 1927 uma_zfree(nat64lsn_host_zone, host); 1928 } 1929 1930 void 1931 nat64lsn_destroy_config(struct nat64lsn_cfg *cfg) 1932 { 1933 struct nat64lsn_host *host; 1934 int i; 1935 1936 CALLOUT_LOCK(cfg); 1937 callout_drain(&cfg->periodic); 1938 CALLOUT_UNLOCK(cfg); 1939 callout_drain(&cfg->jcallout); 1940 1941 for (i = 0; i < cfg->hosts_hashsize; i++) { 1942 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) { 1943 host = CK_SLIST_FIRST(&cfg->hosts_hash[i]); 1944 CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries); 1945 nat64lsn_destroy_host(host); 1946 } 1947 } 1948 1949 for (i = 0; i < (1 << (32 - cfg->plen4)); i++) 1950 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]); 1951 1952 CALLOUT_LOCK_DESTROY(cfg); 1953 CFG_LOCK_DESTROY(cfg); 1954 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS); 1955 free(cfg->hosts_hash, M_NAT64LSN); 1956 free(cfg->aliases, M_NAT64LSN); 1957 free(cfg, M_NAT64LSN); 1958 } 1959 1960