1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015-2019 Yandex LLC 5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/counter.h> 36 #include <sys/ck.h> 37 #include <sys/epoch.h> 38 #include <sys/errno.h> 39 #include <sys/hash.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/module.h> 45 #include <sys/rmlock.h> 46 #include <sys/socket.h> 47 #include <sys/syslog.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_var.h> 52 #include <net/if_pflog.h> 53 #include <net/pfil.h> 54 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/ip_var.h> 58 #include <netinet/ip_fw.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <netinet/ip_icmp.h> 62 #include <netinet/tcp.h> 63 #include <netinet/udp.h> 64 #include <netinet6/in6_var.h> 65 #include <netinet6/ip6_var.h> 66 #include <netinet6/ip_fw_nat64.h> 67 68 #include <netpfil/ipfw/ip_fw_private.h> 69 #include <netpfil/pf/pf.h> 70 71 #include "nat64lsn.h" 72 73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); 74 75 #define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et) 76 #define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et) 77 #define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT() 78 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c)) 79 80 static uma_zone_t nat64lsn_host_zone; 81 static uma_zone_t nat64lsn_pgchunk_zone; 82 static uma_zone_t nat64lsn_pg_zone; 83 static uma_zone_t nat64lsn_aliaslink_zone; 84 static uma_zone_t nat64lsn_state_zone; 85 static uma_zone_t nat64lsn_job_zone; 86 87 static void nat64lsn_periodic(void *data); 88 #define PERIODIC_DELAY 4 89 #define NAT64_LOOKUP(chain, cmd) \ 90 (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) 91 /* 92 * Delayed job queue, used to create new hosts 93 * and new portgroups 94 */ 95 enum nat64lsn_jtype { 96 JTYPE_NEWHOST = 1, 97 JTYPE_NEWPORTGROUP, 98 JTYPE_DESTROY, 99 }; 100 101 struct nat64lsn_job_item { 102 STAILQ_ENTRY(nat64lsn_job_item) entries; 103 enum nat64lsn_jtype jtype; 104 105 union { 106 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */ 107 struct mbuf *m; 108 struct nat64lsn_host *host; 109 struct nat64lsn_state *state; 110 uint32_t src6_hval; 111 uint32_t state_hval; 112 struct ipfw_flow_id f_id; 113 in_addr_t faddr; 114 uint16_t port; 115 uint8_t proto; 116 uint8_t done; 117 }; 118 struct { /* used by JTYPE_DESTROY */ 119 struct nat64lsn_hosts_slist hosts; 120 struct nat64lsn_pg_slist portgroups; 121 struct nat64lsn_pgchunk *pgchunk; 122 struct epoch_context epoch_ctx; 123 }; 124 }; 125 }; 126 127 static struct mtx jmtx; 128 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF) 129 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx) 130 #define JQUEUE_LOCK() mtx_lock(&jmtx) 131 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx) 132 133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, 134 struct nat64lsn_job_item *ji); 135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, 136 struct nat64lsn_job_item *ji); 137 static struct nat64lsn_job_item *nat64lsn_create_job( 138 struct nat64lsn_cfg *cfg, int jtype); 139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, 140 struct nat64lsn_job_item *ji); 141 static void nat64lsn_job_destroy(epoch_context_t ctx); 142 static void nat64lsn_destroy_host(struct nat64lsn_host *host); 143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg); 144 145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, 146 const struct ipfw_flow_id *f_id, struct mbuf **mp); 147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, 148 struct ipfw_flow_id *f_id, struct mbuf **mp); 149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, 150 struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags); 151 152 #define NAT64_BIT_TCP_FIN 0 /* FIN was seen */ 153 #define NAT64_BIT_TCP_SYN 1 /* First syn in->out */ 154 #define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */ 155 #define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */ 156 #define NAT64_BIT_STALE 7 /* state is going to be expired */ 157 158 #define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN) 159 #define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN) 160 #define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB) 161 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) 162 163 #define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4) 164 #define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE) 165 166 static inline uint8_t 167 convert_tcp_flags(uint8_t flags) 168 { 169 uint8_t result; 170 171 result = flags & (TH_FIN|TH_SYN); 172 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ 173 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ 174 175 return (result); 176 } 177 178 static void 179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, 180 struct nat64lsn_state *state) 181 { 182 183 memset(plog, 0, sizeof(*plog)); 184 plog->length = PFLOG_REAL_HDRLEN; 185 plog->af = family; 186 plog->action = PF_NAT; 187 plog->dir = PF_IN; 188 plog->rulenr = htonl(state->ip_src); 189 plog->subrulenr = htonl((uint32_t)(state->aport << 16) | 190 (state->proto << 8) | (state->ip_dst & 0xff)); 191 plog->ruleset[0] = '\0'; 192 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); 193 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); 194 } 195 196 #define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s)) 197 #define HOST_HVAL(c, a) HVAL((a),\ 198 sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed) 199 #define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)]) 200 201 #define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\ 202 sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed) 203 #define ALIAS_BYHASH(c, v) \ 204 ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)]) 205 static struct nat64lsn_aliaslink* 206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused, 207 struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused) 208 { 209 210 /* 211 * We can implement some different algorithms how 212 * select an alias address. 213 * XXX: for now we use first available. 214 */ 215 return (CK_SLIST_FIRST(&host->aliases)); 216 } 217 218 #define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed) 219 #define STATE_HASH(h, v) \ 220 ((h)->states_hash[(v) & ((h)->states_hashsize - 1)]) 221 #define STATES_CHUNK(p, v) \ 222 ((p)->chunks_count == 1 ? (p)->states : \ 223 ((p)->states_chunk[CHUNK_BY_FADDR(p, v)])) 224 225 #ifdef __LP64__ 226 #define FREEMASK_FFSLL(pg, faddr) \ 227 ffsll(*FREEMASK_CHUNK((pg), (faddr))) 228 #define FREEMASK_BTR(pg, faddr, bit) \ 229 ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit)) 230 #define FREEMASK_BTS(pg, faddr, bit) \ 231 ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit)) 232 #define FREEMASK_ISSET(pg, faddr, bit) \ 233 ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit)) 234 #define FREEMASK_COPY(pg, n, out) \ 235 (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n))) 236 #else 237 static inline int 238 freemask_ffsll(uint32_t *freemask) 239 { 240 int i; 241 242 if ((i = ffsl(freemask[0])) != 0) 243 return (i); 244 if ((i = ffsl(freemask[1])) != 0) 245 return (i + 32); 246 return (0); 247 } 248 #define FREEMASK_FFSLL(pg, faddr) \ 249 freemask_ffsll(FREEMASK_CHUNK((pg), (faddr))) 250 #define FREEMASK_BTR(pg, faddr, bit) \ 251 ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32) 252 #define FREEMASK_BTS(pg, faddr, bit) \ 253 ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32) 254 #define FREEMASK_ISSET(pg, faddr, bit) \ 255 ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32) 256 #define FREEMASK_COPY(pg, n, out) \ 257 (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \ 258 ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32) 259 #endif /* !__LP64__ */ 260 261 #define NAT64LSN_TRY_PGCNT 32 262 static struct nat64lsn_pg* 263 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask, 264 struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr, 265 uint32_t *pgidx, in_addr_t faddr) 266 { 267 struct nat64lsn_pg *pg, *oldpg; 268 uint32_t idx, oldidx; 269 int cnt; 270 271 cnt = 0; 272 /* First try last used PG */ 273 oldpg = pg = ck_pr_load_ptr(pgptr); 274 idx = oldidx = ck_pr_load_32(pgidx); 275 /* If pgidx is out of range, reset it to the first pgchunk */ 276 if (!ISSET32(*chunkmask, idx / 32)) 277 idx = 0; 278 do { 279 ck_pr_fence_load(); 280 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) { 281 /* 282 * If last used PG has not free states, 283 * try to update pointer. 284 * NOTE: it can be already updated by jobs handler, 285 * thus we use CAS operation. 286 */ 287 if (cnt > 0) 288 ck_pr_cas_ptr(pgptr, oldpg, pg); 289 return (pg); 290 } 291 /* Stop if idx is out of range */ 292 if (!ISSET32(*chunkmask, idx / 32)) 293 break; 294 295 if (ISSET32(pgmask[idx / 32], idx % 32)) 296 pg = ck_pr_load_ptr( 297 &chunks[idx / 32]->pgptr[idx % 32]); 298 else 299 pg = NULL; 300 301 idx++; 302 } while (++cnt < NAT64LSN_TRY_PGCNT); 303 304 /* If pgidx is out of range, reset it to the first pgchunk */ 305 if (!ISSET32(*chunkmask, idx / 32)) 306 idx = 0; 307 ck_pr_cas_32(pgidx, oldidx, idx); 308 return (NULL); 309 } 310 311 static struct nat64lsn_state* 312 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host, 313 const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr, 314 uint16_t port, uint8_t proto) 315 { 316 struct nat64lsn_aliaslink *link; 317 struct nat64lsn_state *state; 318 struct nat64lsn_pg *pg; 319 int i, offset; 320 321 NAT64LSN_EPOCH_ASSERT(); 322 323 /* Check that we already have state for given arguments */ 324 CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) { 325 if (state->proto == proto && state->ip_dst == faddr && 326 state->sport == port && state->dport == f_id->dst_port) 327 return (state); 328 } 329 330 link = nat64lsn_get_aliaslink(cfg, host, f_id); 331 if (link == NULL) 332 return (NULL); 333 334 switch (proto) { 335 case IPPROTO_TCP: 336 pg = nat64lsn_get_pg( 337 &link->alias->tcp_chunkmask, link->alias->tcp_pgmask, 338 link->alias->tcp, &link->alias->tcp_pg, 339 &link->alias->tcp_pgidx, faddr); 340 break; 341 case IPPROTO_UDP: 342 pg = nat64lsn_get_pg( 343 &link->alias->udp_chunkmask, link->alias->udp_pgmask, 344 link->alias->udp, &link->alias->udp_pg, 345 &link->alias->udp_pgidx, faddr); 346 break; 347 case IPPROTO_ICMP: 348 pg = nat64lsn_get_pg( 349 &link->alias->icmp_chunkmask, link->alias->icmp_pgmask, 350 link->alias->icmp, &link->alias->icmp_pg, 351 &link->alias->icmp_pgidx, faddr); 352 break; 353 default: 354 panic("%s: wrong proto %d", __func__, proto); 355 } 356 if (pg == NULL) 357 return (NULL); 358 359 /* Check that PG has some free states */ 360 state = NULL; 361 i = FREEMASK_BITCOUNT(pg, faddr); 362 while (i-- > 0) { 363 offset = FREEMASK_FFSLL(pg, faddr); 364 if (offset == 0) { 365 /* 366 * We lost the race. 367 * No more free states in this PG. 368 */ 369 break; 370 } 371 372 /* Lets try to atomically grab the state */ 373 if (FREEMASK_BTR(pg, faddr, offset - 1)) { 374 state = &STATES_CHUNK(pg, faddr)->state[offset - 1]; 375 /* Initialize */ 376 state->flags = proto != IPPROTO_TCP ? 0 : 377 convert_tcp_flags(f_id->_flags); 378 state->proto = proto; 379 state->aport = pg->base_port + offset - 1; 380 state->dport = f_id->dst_port; 381 state->sport = port; 382 state->ip6_dst = f_id->dst_ip6; 383 state->ip_dst = faddr; 384 state->ip_src = link->alias->addr; 385 state->hval = hval; 386 state->host = host; 387 SET_AGE(state->timestamp); 388 389 /* Insert new state into host's hash table */ 390 HOST_LOCK(host); 391 CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval), 392 state, entries); 393 host->states_count++; 394 /* 395 * XXX: In case if host is going to be expired, 396 * reset NAT64LSN_DEADHOST flag. 397 */ 398 host->flags &= ~NAT64LSN_DEADHOST; 399 HOST_UNLOCK(host); 400 NAT64STAT_INC(&cfg->base.stats, screated); 401 /* Mark the state as ready for translate4 */ 402 ck_pr_fence_store(); 403 ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4); 404 break; 405 } 406 } 407 return (state); 408 } 409 410 /* 411 * Inspects icmp packets to see if the message contains different 412 * packet header so we need to alter @addr and @port. 413 */ 414 static int 415 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr, 416 uint16_t *port) 417 { 418 struct icmp *icmp; 419 struct ip *ip; 420 int off; 421 uint8_t inner_proto; 422 423 ip = mtod(*mp, struct ip *); /* Outer IP header */ 424 off = (ip->ip_hl << 2) + ICMP_MINLEN; 425 if ((*mp)->m_len < off) 426 *mp = m_pullup(*mp, off); 427 if (*mp == NULL) 428 return (ENOMEM); 429 430 ip = mtod(*mp, struct ip *); /* Outer IP header */ 431 icmp = L3HDR(ip, struct icmp *); 432 switch (icmp->icmp_type) { 433 case ICMP_ECHO: 434 case ICMP_ECHOREPLY: 435 /* Use icmp ID as distinguisher */ 436 *port = ntohs(icmp->icmp_id); 437 return (0); 438 case ICMP_UNREACH: 439 case ICMP_TIMXCEED: 440 break; 441 default: 442 return (EOPNOTSUPP); 443 } 444 /* 445 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits 446 * of ULP header. 447 */ 448 if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) 449 return (EINVAL); 450 if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) 451 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN); 452 if (*mp == NULL) 453 return (ENOMEM); 454 ip = mtodo(*mp, off); /* Inner IP header */ 455 inner_proto = ip->ip_p; 456 off += ip->ip_hl << 2; /* Skip inner IP header */ 457 *addr = ntohl(ip->ip_src.s_addr); 458 if ((*mp)->m_len < off + ICMP_MINLEN) 459 *mp = m_pullup(*mp, off + ICMP_MINLEN); 460 if (*mp == NULL) 461 return (ENOMEM); 462 switch (inner_proto) { 463 case IPPROTO_TCP: 464 case IPPROTO_UDP: 465 /* Copy source port from the header */ 466 *port = ntohs(*((uint16_t *)mtodo(*mp, off))); 467 *proto = inner_proto; 468 return (0); 469 case IPPROTO_ICMP: 470 /* 471 * We will translate only ICMP errors for our ICMP 472 * echo requests. 473 */ 474 icmp = mtodo(*mp, off); 475 if (icmp->icmp_type != ICMP_ECHO) 476 return (EOPNOTSUPP); 477 *port = ntohs(icmp->icmp_id); 478 return (0); 479 }; 480 return (EOPNOTSUPP); 481 } 482 483 static struct nat64lsn_state* 484 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias, 485 in_addr_t faddr, uint16_t port, uint8_t proto) 486 { 487 struct nat64lsn_state *state; 488 struct nat64lsn_pg *pg; 489 int chunk_idx, pg_idx, state_idx; 490 491 NAT64LSN_EPOCH_ASSERT(); 492 493 if (port < NAT64_MIN_PORT) 494 return (NULL); 495 /* 496 * Alias keeps 32 pgchunks for each protocol. 497 * Each pgchunk has 32 pointers to portgroup. 498 * Each portgroup has 64 states for ports. 499 */ 500 port -= NAT64_MIN_PORT; 501 chunk_idx = port / 2048; 502 503 port -= chunk_idx * 2048; 504 pg_idx = port / 64; 505 state_idx = port % 64; 506 507 /* 508 * First check in proto_chunkmask that we have allocated PG chunk. 509 * Then check in proto_pgmask that we have valid PG pointer. 510 */ 511 pg = NULL; 512 switch (proto) { 513 case IPPROTO_TCP: 514 if (ISSET32(alias->tcp_chunkmask, chunk_idx) && 515 ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) { 516 pg = alias->tcp[chunk_idx]->pgptr[pg_idx]; 517 break; 518 } 519 return (NULL); 520 case IPPROTO_UDP: 521 if (ISSET32(alias->udp_chunkmask, chunk_idx) && 522 ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) { 523 pg = alias->udp[chunk_idx]->pgptr[pg_idx]; 524 break; 525 } 526 return (NULL); 527 case IPPROTO_ICMP: 528 if (ISSET32(alias->icmp_chunkmask, chunk_idx) && 529 ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) { 530 pg = alias->icmp[chunk_idx]->pgptr[pg_idx]; 531 break; 532 } 533 return (NULL); 534 default: 535 panic("%s: wrong proto %d", __func__, proto); 536 } 537 if (pg == NULL) 538 return (NULL); 539 540 if (FREEMASK_ISSET(pg, faddr, state_idx)) 541 return (NULL); 542 543 state = &STATES_CHUNK(pg, faddr)->state[state_idx]; 544 ck_pr_fence_load(); 545 if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY) 546 return (state); 547 return (NULL); 548 } 549 550 static int 551 nat64lsn_translate4(struct nat64lsn_cfg *cfg, 552 const struct ipfw_flow_id *f_id, struct mbuf **mp) 553 { 554 struct pfloghdr loghdr, *logdata; 555 struct in6_addr src6; 556 struct nat64lsn_state *state; 557 struct nat64lsn_alias *alias; 558 uint32_t addr, flags; 559 uint16_t port, ts; 560 int ret; 561 uint8_t proto; 562 563 addr = f_id->dst_ip; 564 port = f_id->dst_port; 565 proto = f_id->proto; 566 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 567 NAT64STAT_INC(&cfg->base.stats, nomatch4); 568 return (cfg->nomatch_verdict); 569 } 570 571 /* Check if protocol is supported */ 572 switch (proto) { 573 case IPPROTO_ICMP: 574 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port); 575 if (ret != 0) { 576 if (ret == ENOMEM) { 577 NAT64STAT_INC(&cfg->base.stats, nomem); 578 return (IP_FW_DENY); 579 } 580 NAT64STAT_INC(&cfg->base.stats, noproto); 581 return (cfg->nomatch_verdict); 582 } 583 if (addr < cfg->prefix4 || addr > cfg->pmask4) { 584 NAT64STAT_INC(&cfg->base.stats, nomatch4); 585 return (cfg->nomatch_verdict); 586 } 587 /* FALLTHROUGH */ 588 case IPPROTO_TCP: 589 case IPPROTO_UDP: 590 break; 591 default: 592 NAT64STAT_INC(&cfg->base.stats, noproto); 593 return (cfg->nomatch_verdict); 594 } 595 596 alias = &ALIAS_BYHASH(cfg, addr); 597 MPASS(addr == alias->addr); 598 599 /* Check that we have state for this port */ 600 state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip, 601 port, proto); 602 if (state == NULL) { 603 NAT64STAT_INC(&cfg->base.stats, nomatch4); 604 return (cfg->nomatch_verdict); 605 } 606 607 /* TODO: Check flags to see if we need to do some static mapping */ 608 609 /* Update some state fields if need */ 610 SET_AGE(ts); 611 if (f_id->proto == IPPROTO_TCP) 612 flags = convert_tcp_flags(f_id->_flags); 613 else 614 flags = 0; 615 if (state->timestamp != ts) 616 state->timestamp = ts; 617 if ((state->flags & flags) != flags) 618 state->flags |= flags; 619 620 port = htons(state->sport); 621 src6 = state->ip6_dst; 622 623 if (cfg->base.flags & NAT64_LOG) { 624 logdata = &loghdr; 625 nat64lsn_log(logdata, *mp, AF_INET, state); 626 } else 627 logdata = NULL; 628 629 /* 630 * We already have src6 with embedded address, but it is possible, 631 * that src_ip is different than state->ip_dst, this is why we 632 * do embedding again. 633 */ 634 nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip)); 635 ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port, 636 &cfg->base, logdata); 637 if (ret == NAT64SKIP) 638 return (cfg->nomatch_verdict); 639 if (ret == NAT64RETURN) 640 *mp = NULL; 641 return (IP_FW_DENY); 642 } 643 644 /* 645 * Check if particular state is stale and should be deleted. 646 * Return 1 if true, 0 otherwise. 647 */ 648 static int 649 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state) 650 { 651 int age, ttl; 652 653 /* State was marked as stale in previous pass. */ 654 if (ISSET32(state->flags, NAT64_BIT_STALE)) 655 return (1); 656 657 /* State is not yet initialized, it is going to be READY */ 658 if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4)) 659 return (0); 660 661 age = GET_AGE(state->timestamp); 662 switch (state->proto) { 663 case IPPROTO_TCP: 664 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN)) 665 ttl = cfg->st_close_ttl; 666 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB)) 667 ttl = cfg->st_estab_ttl; 668 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN)) 669 ttl = cfg->st_syn_ttl; 670 else 671 ttl = cfg->st_syn_ttl; 672 if (age > ttl) 673 return (1); 674 break; 675 case IPPROTO_UDP: 676 if (age > cfg->st_udp_ttl) 677 return (1); 678 break; 679 case IPPROTO_ICMP: 680 if (age > cfg->st_icmp_ttl) 681 return (1); 682 break; 683 } 684 return (0); 685 } 686 687 static int 688 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg) 689 { 690 struct nat64lsn_state *state; 691 struct nat64lsn_host *host; 692 uint64_t freemask; 693 int c, i, update_age; 694 695 update_age = 0; 696 for (c = 0; c < pg->chunks_count; c++) { 697 FREEMASK_COPY(pg, c, freemask); 698 for (i = 0; i < 64; i++) { 699 if (ISSET64(freemask, i)) 700 continue; 701 state = &STATES_CHUNK(pg, c)->state[i]; 702 if (nat64lsn_check_state(cfg, state) == 0) { 703 update_age = 1; 704 continue; 705 } 706 /* 707 * Expire state: 708 * 1. Mark as STALE and unlink from host's hash. 709 * 2. Set bit in freemask. 710 */ 711 if (ISSET32(state->flags, NAT64_BIT_STALE)) { 712 /* 713 * State was marked as STALE in previous 714 * pass. Now it is safe to release it. 715 */ 716 state->flags = 0; 717 ck_pr_fence_store(); 718 FREEMASK_BTS(pg, c, i); 719 NAT64STAT_INC(&cfg->base.stats, sdeleted); 720 continue; 721 } 722 MPASS(state->flags & NAT64_FLAG_READY); 723 724 host = state->host; 725 HOST_LOCK(host); 726 CK_SLIST_REMOVE(&STATE_HASH(host, state->hval), 727 state, nat64lsn_state, entries); 728 host->states_count--; 729 HOST_UNLOCK(host); 730 731 /* Reset READY flag */ 732 ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4); 733 /* And set STALE flag */ 734 ck_pr_bts_32(&state->flags, NAT64_BIT_STALE); 735 ck_pr_fence_store(); 736 /* 737 * Now translate6 will not use this state, wait 738 * until it become safe for translate4, then mark 739 * state as free. 740 */ 741 } 742 } 743 744 /* 745 * We have some alive states, update timestamp. 746 */ 747 if (update_age) 748 SET_AGE(pg->timestamp); 749 750 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) 751 return (0); 752 753 return (1); 754 } 755 756 static void 757 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg, 758 struct nat64lsn_pg_slist *portgroups) 759 { 760 struct nat64lsn_alias *alias; 761 struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr; 762 uint32_t *pgmask, *pgidx; 763 int i, idx; 764 765 for (i = 0; i < 1 << (32 - cfg->plen4); i++) { 766 alias = &cfg->aliases[i]; 767 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) { 768 if (nat64lsn_maintain_pg(cfg, pg) == 0) 769 continue; 770 /* Always keep first PG */ 771 if (pg->base_port == NAT64_MIN_PORT) 772 continue; 773 /* 774 * PG is expired, unlink it and schedule for 775 * deferred destroying. 776 */ 777 idx = (pg->base_port - NAT64_MIN_PORT) / 64; 778 switch (pg->proto) { 779 case IPPROTO_TCP: 780 pgmask = alias->tcp_pgmask; 781 pgptr = &alias->tcp_pg; 782 pgidx = &alias->tcp_pgidx; 783 firstpg = alias->tcp[0]->pgptr[0]; 784 break; 785 case IPPROTO_UDP: 786 pgmask = alias->udp_pgmask; 787 pgptr = &alias->udp_pg; 788 pgidx = &alias->udp_pgidx; 789 firstpg = alias->udp[0]->pgptr[0]; 790 break; 791 case IPPROTO_ICMP: 792 pgmask = alias->icmp_pgmask; 793 pgptr = &alias->icmp_pg; 794 pgidx = &alias->icmp_pgidx; 795 firstpg = alias->icmp[0]->pgptr[0]; 796 break; 797 } 798 /* Reset the corresponding bit in pgmask array. */ 799 ck_pr_btr_32(&pgmask[idx / 32], idx % 32); 800 ck_pr_fence_store(); 801 /* If last used PG points to this PG, reset it. */ 802 ck_pr_cas_ptr(pgptr, pg, firstpg); 803 ck_pr_cas_32(pgidx, idx, 0); 804 /* Unlink PG from alias's chain */ 805 ALIAS_LOCK(alias); 806 CK_SLIST_REMOVE(&alias->portgroups, pg, 807 nat64lsn_pg, entries); 808 alias->portgroups_count--; 809 ALIAS_UNLOCK(alias); 810 /* And link to job's chain for deferred destroying */ 811 NAT64STAT_INC(&cfg->base.stats, spgdeleted); 812 CK_SLIST_INSERT_HEAD(portgroups, pg, entries); 813 } 814 } 815 } 816 817 static void 818 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg, 819 struct nat64lsn_hosts_slist *hosts) 820 { 821 struct nat64lsn_host *host, *tmp; 822 int i; 823 824 for (i = 0; i < cfg->hosts_hashsize; i++) { 825 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i], 826 entries, tmp) { 827 /* Is host was marked in previous call? */ 828 if (host->flags & NAT64LSN_DEADHOST) { 829 if (host->states_count > 0) { 830 host->flags &= ~NAT64LSN_DEADHOST; 831 continue; 832 } 833 /* 834 * Unlink host from hash table and schedule 835 * it for deferred destroying. 836 */ 837 CFG_LOCK(cfg); 838 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host, 839 nat64lsn_host, entries); 840 cfg->hosts_count--; 841 CFG_UNLOCK(cfg); 842 CK_SLIST_INSERT_HEAD(hosts, host, entries); 843 continue; 844 } 845 if (GET_AGE(host->timestamp) < cfg->host_delete_delay) 846 continue; 847 if (host->states_count > 0) 848 continue; 849 /* Mark host as going to be expired in next pass */ 850 host->flags |= NAT64LSN_DEADHOST; 851 ck_pr_fence_store(); 852 } 853 } 854 } 855 856 static struct nat64lsn_pgchunk* 857 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg) 858 { 859 #if 0 860 struct nat64lsn_alias *alias; 861 struct nat64lsn_pgchunk *chunk; 862 uint32_t pgmask; 863 int i, c; 864 865 for (i = 0; i < 1 << (32 - cfg->plen4); i++) { 866 alias = &cfg->aliases[i]; 867 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay) 868 continue; 869 /* Always keep single chunk allocated */ 870 for (c = 1; c < 32; c++) { 871 if ((alias->tcp_chunkmask & (1 << c)) == 0) 872 break; 873 chunk = ck_pr_load_ptr(&alias->tcp[c]); 874 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0) 875 continue; 876 ck_pr_btr_32(&alias->tcp_chunkmask, c); 877 ck_pr_fence_load(); 878 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0) 879 continue; 880 } 881 } 882 #endif 883 return (NULL); 884 } 885 886 #if 0 887 static void 888 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg) 889 { 890 struct nat64lsn_host *h; 891 struct nat64lsn_states_slist *hash; 892 int i, j, hsize; 893 894 for (i = 0; i < cfg->hosts_hashsize; i++) { 895 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) { 896 if (h->states_count / 2 < h->states_hashsize || 897 h->states_hashsize >= NAT64LSN_MAX_HSIZE) 898 continue; 899 hsize = h->states_hashsize * 2; 900 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT); 901 if (hash == NULL) 902 continue; 903 for (j = 0; j < hsize; j++) 904 CK_SLIST_INIT(&hash[i]); 905 906 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH); 907 } 908 } 909 } 910 #endif 911 912 /* 913 * This procedure is used to perform various maintance 914 * on dynamic hash list. Currently it is called every 4 seconds. 915 */ 916 static void 917 nat64lsn_periodic(void *data) 918 { 919 struct nat64lsn_job_item *ji; 920 struct nat64lsn_cfg *cfg; 921 922 cfg = (struct nat64lsn_cfg *) data; 923 CURVNET_SET(cfg->vp); 924 if (cfg->hosts_count > 0) { 925 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT); 926 if (ji != NULL) { 927 ji->jtype = JTYPE_DESTROY; 928 CK_SLIST_INIT(&ji->hosts); 929 CK_SLIST_INIT(&ji->portgroups); 930 nat64lsn_expire_hosts(cfg, &ji->hosts); 931 nat64lsn_expire_portgroups(cfg, &ji->portgroups); 932 ji->pgchunk = nat64lsn_expire_pgchunk(cfg); 933 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, 934 nat64lsn_job_destroy); 935 } else 936 NAT64STAT_INC(&cfg->base.stats, jnomem); 937 } 938 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); 939 CURVNET_RESTORE(); 940 } 941 942 #define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0) 943 #define HOST_ERROR(stage) ALLOC_ERROR(stage, 1) 944 #define PG_ERROR(stage) ALLOC_ERROR(stage, 2) 945 static int 946 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 947 { 948 char a[INET6_ADDRSTRLEN]; 949 struct nat64lsn_aliaslink *link; 950 struct nat64lsn_host *host; 951 struct nat64lsn_state *state; 952 uint32_t hval, data[2]; 953 int i; 954 955 /* Check that host was not yet added. */ 956 NAT64LSN_EPOCH_ASSERT(); 957 CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) { 958 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) { 959 /* The host was allocated in previous call. */ 960 ji->host = host; 961 goto get_state; 962 } 963 } 964 965 host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); 966 if (ji->host == NULL) 967 return (HOST_ERROR(1)); 968 969 host->states_hashsize = NAT64LSN_HSIZE; 970 host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) * 971 host->states_hashsize, M_NAT64LSN, M_NOWAIT); 972 if (host->states_hash == NULL) { 973 uma_zfree(nat64lsn_host_zone, host); 974 return (HOST_ERROR(2)); 975 } 976 977 link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT); 978 if (link == NULL) { 979 free(host->states_hash, M_NAT64LSN); 980 uma_zfree(nat64lsn_host_zone, host); 981 return (HOST_ERROR(3)); 982 } 983 984 /* Initialize */ 985 HOST_LOCK_INIT(host); 986 SET_AGE(host->timestamp); 987 host->addr = ji->f_id.src_ip6; 988 host->hval = ji->src6_hval; 989 host->flags = 0; 990 host->states_count = 0; 991 host->states_hashsize = NAT64LSN_HSIZE; 992 CK_SLIST_INIT(&host->aliases); 993 for (i = 0; i < host->states_hashsize; i++) 994 CK_SLIST_INIT(&host->states_hash[i]); 995 996 /* Determine alias from flow hash. */ 997 hval = ALIASLINK_HVAL(cfg, &ji->f_id); 998 link->alias = &ALIAS_BYHASH(cfg, hval); 999 CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries); 1000 1001 ALIAS_LOCK(link->alias); 1002 CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries); 1003 link->alias->hosts_count++; 1004 ALIAS_UNLOCK(link->alias); 1005 1006 CFG_LOCK(cfg); 1007 CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries); 1008 cfg->hosts_count++; 1009 CFG_UNLOCK(cfg); 1010 1011 get_state: 1012 data[0] = ji->faddr; 1013 data[1] = (ji->f_id.dst_port << 16) | ji->port; 1014 ji->state_hval = hval = STATE_HVAL(cfg, data); 1015 state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval, 1016 ji->faddr, ji->port, ji->proto); 1017 /* 1018 * We failed to obtain new state, used alias needs new PG. 1019 * XXX: or another alias should be used. 1020 */ 1021 if (state == NULL) { 1022 /* Try to allocate new PG */ 1023 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0)) 1024 return (HOST_ERROR(4)); 1025 /* We assume that nat64lsn_alloc_pg() got state */ 1026 } else 1027 ji->state = state; 1028 1029 ji->done = 1; 1030 DPRINTF(DP_OBJ, "ALLOC HOST %s %p", 1031 inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host); 1032 return (HOST_ERROR(0)); 1033 } 1034 1035 static int 1036 nat64lsn_find_pg_place(uint32_t *data) 1037 { 1038 int i; 1039 1040 for (i = 0; i < 32; i++) { 1041 if (~data[i] == 0) 1042 continue; 1043 return (i * 32 + ffs(~data[i]) - 1); 1044 } 1045 return (-1); 1046 } 1047 1048 static int 1049 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg, 1050 struct nat64lsn_alias *alias, uint32_t *chunkmask, 1051 uint32_t *pgmask, struct nat64lsn_pgchunk **chunks, 1052 struct nat64lsn_pg **pgptr, uint8_t proto) 1053 { 1054 struct nat64lsn_pg *pg; 1055 int i, pg_idx, chunk_idx; 1056 1057 /* Find place in pgchunk where PG can be added */ 1058 pg_idx = nat64lsn_find_pg_place(pgmask); 1059 if (pg_idx < 0) /* no more PGs */ 1060 return (PG_ERROR(1)); 1061 /* Check that we have allocated pgchunk for given PG index */ 1062 chunk_idx = pg_idx / 32; 1063 if (!ISSET32(*chunkmask, chunk_idx)) { 1064 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone, 1065 M_NOWAIT); 1066 if (chunks[chunk_idx] == NULL) 1067 return (PG_ERROR(2)); 1068 ck_pr_bts_32(chunkmask, chunk_idx); 1069 ck_pr_fence_store(); 1070 } 1071 /* Allocate PG and states chunks */ 1072 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); 1073 if (pg == NULL) 1074 return (PG_ERROR(3)); 1075 pg->chunks_count = cfg->states_chunks; 1076 if (pg->chunks_count > 1) { 1077 pg->freemask_chunk = malloc(pg->chunks_count * 1078 sizeof(uint64_t), M_NAT64LSN, M_NOWAIT); 1079 if (pg->freemask_chunk == NULL) { 1080 uma_zfree(nat64lsn_pg_zone, pg); 1081 return (PG_ERROR(4)); 1082 } 1083 pg->states_chunk = malloc(pg->chunks_count * 1084 sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN, 1085 M_NOWAIT | M_ZERO); 1086 if (pg->states_chunk == NULL) { 1087 free(pg->freemask_chunk, M_NAT64LSN); 1088 uma_zfree(nat64lsn_pg_zone, pg); 1089 return (PG_ERROR(5)); 1090 } 1091 for (i = 0; i < pg->chunks_count; i++) { 1092 pg->states_chunk[i] = uma_zalloc( 1093 nat64lsn_state_zone, M_NOWAIT); 1094 if (pg->states_chunk[i] == NULL) 1095 goto states_failed; 1096 } 1097 memset(pg->freemask_chunk, 0xff, 1098 sizeof(uint64_t) * pg->chunks_count); 1099 } else { 1100 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT); 1101 if (pg->states == NULL) { 1102 uma_zfree(nat64lsn_pg_zone, pg); 1103 return (PG_ERROR(6)); 1104 } 1105 memset(&pg->freemask64, 0xff, sizeof(uint64_t)); 1106 } 1107 1108 /* Initialize PG and hook it to pgchunk */ 1109 SET_AGE(pg->timestamp); 1110 pg->proto = proto; 1111 pg->base_port = NAT64_MIN_PORT + 64 * pg_idx; 1112 ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg); 1113 ck_pr_fence_store(); 1114 ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32); 1115 ck_pr_store_ptr(pgptr, pg); 1116 1117 ALIAS_LOCK(alias); 1118 CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries); 1119 SET_AGE(alias->timestamp); 1120 alias->portgroups_count++; 1121 ALIAS_UNLOCK(alias); 1122 NAT64STAT_INC(&cfg->base.stats, spgcreated); 1123 return (PG_ERROR(0)); 1124 1125 states_failed: 1126 for (i = 0; i < pg->chunks_count; i++) 1127 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]); 1128 free(pg->freemask_chunk, M_NAT64LSN); 1129 free(pg->states_chunk, M_NAT64LSN); 1130 uma_zfree(nat64lsn_pg_zone, pg); 1131 return (PG_ERROR(7)); 1132 } 1133 1134 static int 1135 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1136 { 1137 struct nat64lsn_aliaslink *link; 1138 struct nat64lsn_alias *alias; 1139 int ret; 1140 1141 link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id); 1142 if (link == NULL) 1143 return (PG_ERROR(1)); 1144 1145 /* 1146 * TODO: check that we did not already allocated PG in 1147 * previous call. 1148 */ 1149 1150 ret = 0; 1151 alias = link->alias; 1152 /* Find place in pgchunk where PG can be added */ 1153 switch (ji->proto) { 1154 case IPPROTO_TCP: 1155 ret = nat64lsn_alloc_proto_pg(cfg, alias, 1156 &alias->tcp_chunkmask, alias->tcp_pgmask, 1157 alias->tcp, &alias->tcp_pg, ji->proto); 1158 break; 1159 case IPPROTO_UDP: 1160 ret = nat64lsn_alloc_proto_pg(cfg, alias, 1161 &alias->udp_chunkmask, alias->udp_pgmask, 1162 alias->udp, &alias->udp_pg, ji->proto); 1163 break; 1164 case IPPROTO_ICMP: 1165 ret = nat64lsn_alloc_proto_pg(cfg, alias, 1166 &alias->icmp_chunkmask, alias->icmp_pgmask, 1167 alias->icmp, &alias->icmp_pg, ji->proto); 1168 break; 1169 default: 1170 panic("%s: wrong proto %d", __func__, ji->proto); 1171 } 1172 if (ret == PG_ERROR(1)) { 1173 /* 1174 * PG_ERROR(1) means that alias lacks free PGs 1175 * XXX: try next alias. 1176 */ 1177 printf("NAT64LSN: %s: failed to obtain PG\n", 1178 __func__); 1179 return (ret); 1180 } 1181 if (ret == PG_ERROR(0)) { 1182 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id, 1183 ji->state_hval, ji->faddr, ji->port, ji->proto); 1184 if (ji->state == NULL) 1185 ret = PG_ERROR(8); 1186 else 1187 ji->done = 1; 1188 } 1189 return (ret); 1190 } 1191 1192 static void 1193 nat64lsn_do_request(void *data) 1194 { 1195 struct epoch_tracker et; 1196 struct nat64lsn_job_head jhead; 1197 struct nat64lsn_job_item *ji, *ji2; 1198 struct nat64lsn_cfg *cfg; 1199 int jcount; 1200 uint8_t flags; 1201 1202 cfg = (struct nat64lsn_cfg *)data; 1203 if (cfg->jlen == 0) 1204 return; 1205 1206 CURVNET_SET(cfg->vp); 1207 STAILQ_INIT(&jhead); 1208 1209 /* Grab queue */ 1210 JQUEUE_LOCK(); 1211 STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item); 1212 jcount = cfg->jlen; 1213 cfg->jlen = 0; 1214 JQUEUE_UNLOCK(); 1215 1216 /* TODO: check if we need to resize hash */ 1217 1218 NAT64STAT_INC(&cfg->base.stats, jcalls); 1219 DPRINTF(DP_JQUEUE, "count=%d", jcount); 1220 1221 /* 1222 * TODO: 1223 * What we should do here is to build a hash 1224 * to ensure we don't have lots of duplicate requests. 1225 * Skip this for now. 1226 * 1227 * TODO: Limit per-call number of items 1228 */ 1229 1230 NAT64LSN_EPOCH_ENTER(et); 1231 STAILQ_FOREACH(ji, &jhead, entries) { 1232 switch (ji->jtype) { 1233 case JTYPE_NEWHOST: 1234 if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0)) 1235 NAT64STAT_INC(&cfg->base.stats, jhostfails); 1236 break; 1237 case JTYPE_NEWPORTGROUP: 1238 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0)) 1239 NAT64STAT_INC(&cfg->base.stats, jportfails); 1240 break; 1241 default: 1242 continue; 1243 } 1244 if (ji->done != 0) { 1245 flags = ji->proto != IPPROTO_TCP ? 0 : 1246 convert_tcp_flags(ji->f_id._flags); 1247 nat64lsn_translate6_internal(cfg, &ji->m, 1248 ji->state, flags); 1249 NAT64STAT_INC(&cfg->base.stats, jreinjected); 1250 } 1251 } 1252 NAT64LSN_EPOCH_EXIT(et); 1253 1254 ji = STAILQ_FIRST(&jhead); 1255 while (ji != NULL) { 1256 ji2 = STAILQ_NEXT(ji, entries); 1257 /* 1258 * In any case we must free mbuf if 1259 * translator did not consumed it. 1260 */ 1261 m_freem(ji->m); 1262 uma_zfree(nat64lsn_job_zone, ji); 1263 ji = ji2; 1264 } 1265 CURVNET_RESTORE(); 1266 } 1267 1268 static struct nat64lsn_job_item * 1269 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype) 1270 { 1271 struct nat64lsn_job_item *ji; 1272 1273 /* 1274 * Do not try to lock possibly contested mutex if we're near the 1275 * limit. Drop packet instead. 1276 */ 1277 ji = NULL; 1278 if (cfg->jlen >= cfg->jmaxlen) 1279 NAT64STAT_INC(&cfg->base.stats, jmaxlen); 1280 else { 1281 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT); 1282 if (ji == NULL) 1283 NAT64STAT_INC(&cfg->base.stats, jnomem); 1284 } 1285 if (ji == NULL) { 1286 NAT64STAT_INC(&cfg->base.stats, dropped); 1287 DPRINTF(DP_DROPS, "failed to create job"); 1288 } else { 1289 ji->jtype = jtype; 1290 ji->done = 0; 1291 } 1292 return (ji); 1293 } 1294 1295 static void 1296 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) 1297 { 1298 1299 JQUEUE_LOCK(); 1300 STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries); 1301 NAT64STAT_INC(&cfg->base.stats, jrequests); 1302 cfg->jlen++; 1303 1304 if (callout_pending(&cfg->jcallout) == 0) 1305 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); 1306 JQUEUE_UNLOCK(); 1307 } 1308 1309 static void 1310 nat64lsn_job_destroy(epoch_context_t ctx) 1311 { 1312 struct nat64lsn_job_item *ji; 1313 struct nat64lsn_host *host; 1314 struct nat64lsn_pg *pg; 1315 int i; 1316 1317 ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx); 1318 MPASS(ji->jtype == JTYPE_DESTROY); 1319 while (!CK_SLIST_EMPTY(&ji->hosts)) { 1320 host = CK_SLIST_FIRST(&ji->hosts); 1321 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries); 1322 if (host->states_count > 0) { 1323 /* 1324 * XXX: The state has been created 1325 * during host deletion. 1326 */ 1327 printf("NAT64LSN: %s: destroying host with %d " 1328 "states\n", __func__, host->states_count); 1329 } 1330 nat64lsn_destroy_host(host); 1331 } 1332 while (!CK_SLIST_EMPTY(&ji->portgroups)) { 1333 pg = CK_SLIST_FIRST(&ji->portgroups); 1334 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries); 1335 for (i = 0; i < pg->chunks_count; i++) { 1336 if (FREEMASK_BITCOUNT(pg, i) != 64) { 1337 /* 1338 * XXX: The state has been created during 1339 * PG deletion. 1340 */ 1341 printf("NAT64LSN: %s: destroying PG %p " 1342 "with non-empty chunk %d\n", __func__, 1343 pg, i); 1344 } 1345 } 1346 nat64lsn_destroy_pg(pg); 1347 } 1348 uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk); 1349 uma_zfree(nat64lsn_job_zone, ji); 1350 } 1351 1352 static int 1353 nat64lsn_request_host(struct nat64lsn_cfg *cfg, 1354 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval, 1355 in_addr_t faddr, uint16_t port, uint8_t proto) 1356 { 1357 struct nat64lsn_job_item *ji; 1358 1359 ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST); 1360 if (ji != NULL) { 1361 ji->m = *mp; 1362 ji->f_id = *f_id; 1363 ji->faddr = faddr; 1364 ji->port = port; 1365 ji->proto = proto; 1366 ji->src6_hval = hval; 1367 1368 nat64lsn_enqueue_job(cfg, ji); 1369 NAT64STAT_INC(&cfg->base.stats, jhostsreq); 1370 *mp = NULL; 1371 } 1372 return (IP_FW_DENY); 1373 } 1374 1375 static int 1376 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host, 1377 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval, 1378 in_addr_t faddr, uint16_t port, uint8_t proto) 1379 { 1380 struct nat64lsn_job_item *ji; 1381 1382 ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP); 1383 if (ji != NULL) { 1384 ji->m = *mp; 1385 ji->f_id = *f_id; 1386 ji->faddr = faddr; 1387 ji->port = port; 1388 ji->proto = proto; 1389 ji->state_hval = hval; 1390 ji->host = host; 1391 1392 nat64lsn_enqueue_job(cfg, ji); 1393 NAT64STAT_INC(&cfg->base.stats, jportreq); 1394 *mp = NULL; 1395 } 1396 return (IP_FW_DENY); 1397 } 1398 1399 static int 1400 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp, 1401 struct nat64lsn_state *state, uint8_t flags) 1402 { 1403 struct pfloghdr loghdr, *logdata; 1404 int ret; 1405 uint16_t ts; 1406 1407 /* Update timestamp and flags if needed */ 1408 SET_AGE(ts); 1409 if (state->timestamp != ts) 1410 state->timestamp = ts; 1411 if ((state->flags & flags) != 0) 1412 state->flags |= flags; 1413 1414 if (cfg->base.flags & NAT64_LOG) { 1415 logdata = &loghdr; 1416 nat64lsn_log(logdata, *mp, AF_INET6, state); 1417 } else 1418 logdata = NULL; 1419 1420 ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src), 1421 htons(state->aport), &cfg->base, logdata); 1422 if (ret == NAT64SKIP) 1423 return (cfg->nomatch_verdict); 1424 if (ret == NAT64RETURN) 1425 *mp = NULL; 1426 return (IP_FW_DENY); 1427 } 1428 1429 static int 1430 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, 1431 struct mbuf **mp) 1432 { 1433 struct nat64lsn_state *state; 1434 struct nat64lsn_host *host; 1435 struct icmp6_hdr *icmp6; 1436 uint32_t addr, hval, data[2]; 1437 int offset, proto; 1438 uint16_t port; 1439 uint8_t flags; 1440 1441 /* Check if protocol is supported */ 1442 port = f_id->src_port; 1443 proto = f_id->proto; 1444 switch (f_id->proto) { 1445 case IPPROTO_ICMPV6: 1446 /* 1447 * For ICMPv6 echo reply/request we use icmp6_id as 1448 * local port. 1449 */ 1450 offset = 0; 1451 proto = nat64_getlasthdr(*mp, &offset); 1452 if (proto < 0) { 1453 NAT64STAT_INC(&cfg->base.stats, dropped); 1454 DPRINTF(DP_DROPS, "mbuf isn't contigious"); 1455 return (IP_FW_DENY); 1456 } 1457 if (proto == IPPROTO_ICMPV6) { 1458 icmp6 = mtodo(*mp, offset); 1459 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || 1460 icmp6->icmp6_type == ICMP6_ECHO_REPLY) 1461 port = ntohs(icmp6->icmp6_id); 1462 } 1463 proto = IPPROTO_ICMP; 1464 /* FALLTHROUGH */ 1465 case IPPROTO_TCP: 1466 case IPPROTO_UDP: 1467 break; 1468 default: 1469 NAT64STAT_INC(&cfg->base.stats, noproto); 1470 return (cfg->nomatch_verdict); 1471 } 1472 1473 /* Extract IPv4 from destination IPv6 address */ 1474 addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen); 1475 if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) { 1476 char a[INET_ADDRSTRLEN]; 1477 1478 NAT64STAT_INC(&cfg->base.stats, dropped); 1479 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s", 1480 inet_ntop(AF_INET, &addr, a, sizeof(a))); 1481 return (IP_FW_DENY); /* XXX: add extra stats? */ 1482 } 1483 1484 /* Try to find host */ 1485 hval = HOST_HVAL(cfg, &f_id->src_ip6); 1486 CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) { 1487 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr)) 1488 break; 1489 } 1490 /* We use IPv4 address in host byte order */ 1491 addr = ntohl(addr); 1492 if (host == NULL) 1493 return (nat64lsn_request_host(cfg, f_id, mp, 1494 hval, addr, port, proto)); 1495 1496 flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags); 1497 1498 data[0] = addr; 1499 data[1] = (f_id->dst_port << 16) | port; 1500 hval = STATE_HVAL(cfg, data); 1501 state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr, 1502 port, proto); 1503 if (state == NULL) 1504 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr, 1505 port, proto)); 1506 return (nat64lsn_translate6_internal(cfg, mp, state, flags)); 1507 } 1508 1509 /* 1510 * Main dataplane entry point. 1511 */ 1512 int 1513 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, 1514 ipfw_insn *cmd, int *done) 1515 { 1516 struct nat64lsn_cfg *cfg; 1517 ipfw_insn *icmd; 1518 int ret; 1519 1520 IPFW_RLOCK_ASSERT(ch); 1521 1522 *done = 0; /* continue the search in case of failure */ 1523 icmd = cmd + 1; 1524 if (cmd->opcode != O_EXTERNAL_ACTION || 1525 cmd->arg1 != V_nat64lsn_eid || 1526 icmd->opcode != O_EXTERNAL_INSTANCE || 1527 (cfg = NAT64_LOOKUP(ch, icmd)) == NULL) 1528 return (IP_FW_DENY); 1529 1530 *done = 1; /* terminate the search */ 1531 1532 switch (args->f_id.addr_type) { 1533 case 4: 1534 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); 1535 break; 1536 case 6: 1537 /* 1538 * Check that destination IPv6 address matches our prefix6. 1539 */ 1540 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 && 1541 memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix, 1542 cfg->base.plat_plen / 8) != 0) { 1543 ret = cfg->nomatch_verdict; 1544 break; 1545 } 1546 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); 1547 break; 1548 default: 1549 ret = cfg->nomatch_verdict; 1550 } 1551 1552 if (ret != IP_FW_PASS && args->m != NULL) { 1553 m_freem(args->m); 1554 args->m = NULL; 1555 } 1556 return (ret); 1557 } 1558 1559 static int 1560 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags) 1561 { 1562 struct nat64lsn_states_chunk *chunk; 1563 int i; 1564 1565 chunk = (struct nat64lsn_states_chunk *)mem; 1566 for (i = 0; i < 64; i++) 1567 chunk->state[i].flags = 0; 1568 return (0); 1569 } 1570 1571 void 1572 nat64lsn_init_internal(void) 1573 { 1574 1575 nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts", 1576 sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL, 1577 UMA_ALIGN_PTR, 0); 1578 nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks", 1579 sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL, 1580 UMA_ALIGN_PTR, 0); 1581 nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups", 1582 sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL, 1583 UMA_ALIGN_PTR, 0); 1584 nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links", 1585 sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL, 1586 UMA_ALIGN_PTR, 0); 1587 nat64lsn_state_zone = uma_zcreate("NAT64LSN states", 1588 sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor, 1589 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1590 nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs", 1591 sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL, 1592 UMA_ALIGN_PTR, 0); 1593 JQUEUE_LOCK_INIT(); 1594 } 1595 1596 void 1597 nat64lsn_uninit_internal(void) 1598 { 1599 1600 /* XXX: epoch_task drain */ 1601 JQUEUE_LOCK_DESTROY(); 1602 uma_zdestroy(nat64lsn_host_zone); 1603 uma_zdestroy(nat64lsn_pgchunk_zone); 1604 uma_zdestroy(nat64lsn_pg_zone); 1605 uma_zdestroy(nat64lsn_aliaslink_zone); 1606 uma_zdestroy(nat64lsn_state_zone); 1607 uma_zdestroy(nat64lsn_job_zone); 1608 } 1609 1610 void 1611 nat64lsn_start_instance(struct nat64lsn_cfg *cfg) 1612 { 1613 1614 CALLOUT_LOCK(cfg); 1615 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, 1616 nat64lsn_periodic, cfg); 1617 CALLOUT_UNLOCK(cfg); 1618 } 1619 1620 struct nat64lsn_cfg * 1621 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen) 1622 { 1623 struct nat64lsn_cfg *cfg; 1624 struct nat64lsn_alias *alias; 1625 int i, naddr; 1626 1627 cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN, 1628 M_WAITOK | M_ZERO); 1629 1630 CFG_LOCK_INIT(cfg); 1631 CALLOUT_LOCK_INIT(cfg); 1632 STAILQ_INIT(&cfg->jhead); 1633 cfg->vp = curvnet; 1634 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK); 1635 1636 cfg->hash_seed = arc4random(); 1637 cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE; 1638 cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) * 1639 cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO); 1640 for (i = 0; i < cfg->hosts_hashsize; i++) 1641 CK_SLIST_INIT(&cfg->hosts_hash[i]); 1642 1643 naddr = 1 << (32 - plen); 1644 cfg->prefix4 = prefix; 1645 cfg->pmask4 = prefix | (naddr - 1); 1646 cfg->plen4 = plen; 1647 cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr, 1648 M_NAT64LSN, M_WAITOK | M_ZERO); 1649 for (i = 0; i < naddr; i++) { 1650 alias = &cfg->aliases[i]; 1651 alias->addr = prefix + i; /* host byte order */ 1652 CK_SLIST_INIT(&alias->hosts); 1653 ALIAS_LOCK_INIT(alias); 1654 } 1655 1656 callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0); 1657 callout_init(&cfg->jcallout, CALLOUT_MPSAFE); 1658 1659 return (cfg); 1660 } 1661 1662 static void 1663 nat64lsn_destroy_pg(struct nat64lsn_pg *pg) 1664 { 1665 int i; 1666 1667 if (pg->chunks_count == 1) { 1668 uma_zfree(nat64lsn_state_zone, pg->states); 1669 } else { 1670 for (i = 0; i < pg->chunks_count; i++) 1671 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]); 1672 free(pg->states_chunk, M_NAT64LSN); 1673 free(pg->freemask_chunk, M_NAT64LSN); 1674 } 1675 uma_zfree(nat64lsn_pg_zone, pg); 1676 } 1677 1678 static void 1679 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg, 1680 struct nat64lsn_alias *alias) 1681 { 1682 struct nat64lsn_pg *pg; 1683 int i; 1684 1685 while (!CK_SLIST_EMPTY(&alias->portgroups)) { 1686 pg = CK_SLIST_FIRST(&alias->portgroups); 1687 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries); 1688 nat64lsn_destroy_pg(pg); 1689 } 1690 for (i = 0; i < 32; i++) { 1691 if (ISSET32(alias->tcp_chunkmask, i)) 1692 uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]); 1693 if (ISSET32(alias->udp_chunkmask, i)) 1694 uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]); 1695 if (ISSET32(alias->icmp_chunkmask, i)) 1696 uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]); 1697 } 1698 ALIAS_LOCK_DESTROY(alias); 1699 } 1700 1701 static void 1702 nat64lsn_destroy_host(struct nat64lsn_host *host) 1703 { 1704 struct nat64lsn_aliaslink *link; 1705 1706 while (!CK_SLIST_EMPTY(&host->aliases)) { 1707 link = CK_SLIST_FIRST(&host->aliases); 1708 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries); 1709 1710 ALIAS_LOCK(link->alias); 1711 CK_SLIST_REMOVE(&link->alias->hosts, link, 1712 nat64lsn_aliaslink, alias_entries); 1713 link->alias->hosts_count--; 1714 ALIAS_UNLOCK(link->alias); 1715 1716 uma_zfree(nat64lsn_aliaslink_zone, link); 1717 } 1718 HOST_LOCK_DESTROY(host); 1719 free(host->states_hash, M_NAT64LSN); 1720 uma_zfree(nat64lsn_host_zone, host); 1721 } 1722 1723 void 1724 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) 1725 { 1726 struct nat64lsn_host *host; 1727 int i; 1728 1729 CALLOUT_LOCK(cfg); 1730 callout_drain(&cfg->periodic); 1731 CALLOUT_UNLOCK(cfg); 1732 callout_drain(&cfg->jcallout); 1733 1734 for (i = 0; i < cfg->hosts_hashsize; i++) { 1735 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) { 1736 host = CK_SLIST_FIRST(&cfg->hosts_hash[i]); 1737 CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries); 1738 nat64lsn_destroy_host(host); 1739 } 1740 } 1741 1742 for (i = 0; i < (1 << (32 - cfg->plen4)); i++) 1743 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]); 1744 1745 CALLOUT_LOCK_DESTROY(cfg); 1746 CFG_LOCK_DESTROY(cfg); 1747 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS); 1748 free(cfg->hosts_hash, M_NAT64LSN); 1749 free(cfg->aliases, M_NAT64LSN); 1750 free(cfg, M_NAT64LSN); 1751 } 1752