1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2020 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2016-2020 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/counter.h>
33 #include <sys/ck.h>
34 #include <sys/epoch.h>
35 #include <sys/errno.h>
36 #include <sys/hash.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rmlock.h>
43 #include <sys/socket.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_pflog.h>
50 #include <net/pfil.h>
51
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/ip_fw.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 #include <netinet6/in6_var.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip_fw_nat64.h>
64
65 #include <netpfil/ipfw/ip_fw_private.h>
66 #include <netpfil/pf/pf.h>
67
68 #include "nat64lsn.h"
69
70 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
71
72 #define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et)
73 #define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et)
74 #define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT()
75 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
76
77 static uma_zone_t nat64lsn_host_zone;
78 static uma_zone_t nat64lsn_pgchunk_zone;
79 static uma_zone_t nat64lsn_pg_zone;
80 static uma_zone_t nat64lsn_aliaslink_zone;
81 static uma_zone_t nat64lsn_state_zone;
82 static uma_zone_t nat64lsn_job_zone;
83
84 static void nat64lsn_periodic(void *data);
85 #define PERIODIC_DELAY 4
86 #define NAT64_LOOKUP(chain, cmd) \
87 (struct nat64lsn_instance *)SRV_OBJECT((chain), insntod(cmd, kidx)->kidx)
88 /*
89 * Delayed job queue, used to create new hosts
90 * and new portgroups
91 */
92 enum nat64lsn_jtype {
93 JTYPE_NEWHOST = 1,
94 JTYPE_NEWPORTGROUP,
95 JTYPE_DESTROY,
96 };
97
98 struct nat64lsn_job_item {
99 STAILQ_ENTRY(nat64lsn_job_item) entries;
100 enum nat64lsn_jtype jtype;
101
102 union {
103 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
104 struct mbuf *m;
105 struct nat64lsn_host *host;
106 struct nat64lsn_state *state;
107 uint32_t src6_hval;
108 uint32_t state_hval;
109 struct ipfw_flow_id f_id;
110 in_addr_t faddr;
111 uint16_t port;
112 uint8_t proto;
113 uint8_t done;
114 };
115 struct { /* used by JTYPE_DESTROY */
116 struct nat64lsn_hosts_slist hosts;
117 struct nat64lsn_pg_slist portgroups;
118 struct nat64lsn_pgchunk *pgchunk;
119 struct epoch_context epoch_ctx;
120 };
121 };
122 };
123
124 static struct mtx jmtx;
125 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
126 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
127 #define JQUEUE_LOCK() mtx_lock(&jmtx)
128 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
129
130 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
131 struct nat64lsn_job_item *ji);
132 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
133 struct nat64lsn_job_item *ji);
134 static struct nat64lsn_job_item *nat64lsn_create_job(
135 struct nat64lsn_cfg *cfg, int jtype);
136 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
137 struct nat64lsn_job_item *ji);
138 static void nat64lsn_job_destroy(epoch_context_t ctx);
139 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
140 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
141
142 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
143 const struct ipfw_flow_id *f_id, struct mbuf **mp);
144 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
145 struct ipfw_flow_id *f_id, struct mbuf **mp);
146 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
147 struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
148
149 #define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
150 #define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
151 #define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
152 #define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
153 #define NAT64_BIT_STALE 7 /* state is going to be expired */
154
155 #define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
156 #define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
157 #define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
158 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
159
160 #define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
161 #define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
162
163 static inline uint8_t
convert_tcp_flags(uint8_t flags)164 convert_tcp_flags(uint8_t flags)
165 {
166 uint8_t result;
167
168 result = flags & (TH_FIN|TH_SYN);
169 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
170 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
171
172 return (result);
173 }
174
175 static void
nat64lsn_log(struct pfloghdr * plog,struct mbuf * m,sa_family_t family,struct nat64lsn_state * state)176 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
177 struct nat64lsn_state *state)
178 {
179
180 memset(plog, 0, sizeof(*plog));
181 plog->length = PFLOG_REAL_HDRLEN;
182 plog->af = family;
183 plog->action = PF_NAT;
184 plog->dir = PF_IN;
185 plog->rulenr = htonl(state->ip_src);
186 plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
187 (state->proto << 8) | (state->ip_dst & 0xff));
188 plog->ruleset[0] = '\0';
189 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
190 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
191 }
192
193 #define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
194 #define HOST_HVAL(c, a) HVAL((a),\
195 sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
196 #define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
197
198 #define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
199 sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
200 #define ALIAS_BYHASH(c, v) \
201 ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
202 static struct nat64lsn_aliaslink*
nat64lsn_get_aliaslink(struct nat64lsn_cfg * cfg __unused,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id __unused)203 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
204 struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
205 {
206
207 /*
208 * We can implement some different algorithms how
209 * select an alias address.
210 * XXX: for now we use first available.
211 */
212 return (CK_SLIST_FIRST(&host->aliases));
213 }
214
215 static struct nat64lsn_alias*
nat64lsn_get_alias(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id __unused)216 nat64lsn_get_alias(struct nat64lsn_cfg *cfg,
217 const struct ipfw_flow_id *f_id __unused)
218 {
219 static uint32_t idx = 0;
220
221 /*
222 * We can choose alias by number of allocated PGs,
223 * not used yet by other hosts, or some static configured
224 * by user.
225 * XXX: for now we choose it using round robin.
226 */
227 return (&ALIAS_BYHASH(cfg, idx++));
228 }
229
230 #define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
231 #define STATE_HASH(h, v) \
232 ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
233 #define STATES_CHUNK(p, v) \
234 ((p)->chunks_count == 1 ? (p)->states : \
235 ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
236
237 #ifdef __LP64__
238 #define FREEMASK_FFSLL(pg, faddr) \
239 ffsll(*FREEMASK_CHUNK((pg), (faddr)))
240 #define FREEMASK_BTR(pg, faddr, bit) \
241 ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
242 #define FREEMASK_BTS(pg, faddr, bit) \
243 ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
244 #define FREEMASK_ISSET(pg, faddr, bit) \
245 ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
246 #define FREEMASK_COPY(pg, n, out) \
247 (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
248 #else
249 static inline int
freemask_ffsll(uint32_t * freemask)250 freemask_ffsll(uint32_t *freemask)
251 {
252 int i;
253
254 if ((i = ffsl(freemask[0])) != 0)
255 return (i);
256 if ((i = ffsl(freemask[1])) != 0)
257 return (i + 32);
258 return (0);
259 }
260 #define FREEMASK_FFSLL(pg, faddr) \
261 freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
262 #define FREEMASK_BTR(pg, faddr, bit) \
263 ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
264 #define FREEMASK_BTS(pg, faddr, bit) \
265 ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
266 #define FREEMASK_ISSET(pg, faddr, bit) \
267 ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
268 #define FREEMASK_COPY(pg, n, out) \
269 (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
270 ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
271 #endif /* !__LP64__ */
272
273
274 #define NAT64LSN_TRY_PGCNT 36
275 static struct nat64lsn_pg*
nat64lsn_get_pg(uint32_t * chunkmask,uint32_t * pgmask,struct nat64lsn_pgchunk ** chunks,uint32_t * pgidx,in_addr_t faddr)276 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
277 struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, in_addr_t faddr)
278 {
279 struct nat64lsn_pg *pg;
280 uint32_t idx, oldidx;
281 int cnt;
282
283 /* First try last used PG. */
284 idx = oldidx = ck_pr_load_32(pgidx);
285 MPASS(idx < 1024);
286 cnt = 0;
287 do {
288 ck_pr_fence_load();
289 if (idx > 1023 || !ISSET32(*chunkmask, idx / 32)) {
290 /* If it is first try, reset idx to first PG */
291 idx = 0;
292 /* Stop if idx is out of range */
293 if (cnt > 0)
294 break;
295 }
296 if (ISSET32(pgmask[idx / 32], idx % 32)) {
297 pg = ck_pr_load_ptr(
298 &chunks[idx / 32]->pgptr[idx % 32]);
299 ck_pr_fence_load();
300 /*
301 * Make sure that pg did not become DEAD.
302 */
303 if ((pg->flags & NAT64LSN_DEADPG) == 0 &&
304 FREEMASK_BITCOUNT(pg, faddr) > 0) {
305 if (cnt > 0)
306 ck_pr_cas_32(pgidx, oldidx, idx);
307 return (pg);
308 }
309 }
310 idx++;
311 } while (++cnt < NAT64LSN_TRY_PGCNT);
312 if (oldidx != idx)
313 ck_pr_cas_32(pgidx, oldidx, idx);
314 return (NULL);
315 }
316
317 static struct nat64lsn_state*
nat64lsn_get_state6to4(struct nat64lsn_cfg * cfg,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)318 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
319 const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
320 uint16_t port, uint8_t proto)
321 {
322 struct nat64lsn_aliaslink *link;
323 struct nat64lsn_state *state;
324 struct nat64lsn_pg *pg;
325 int i, offset;
326
327 NAT64LSN_EPOCH_ASSERT();
328
329 /* Check that we already have state for given arguments */
330 CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
331 if (state->proto == proto && state->ip_dst == faddr &&
332 state->sport == port && state->dport == f_id->dst_port)
333 return (state);
334 }
335
336 link = nat64lsn_get_aliaslink(cfg, host, f_id);
337 if (link == NULL)
338 return (NULL);
339
340 switch (proto) {
341 case IPPROTO_TCP:
342 pg = nat64lsn_get_pg(&link->alias->tcp_chunkmask,
343 link->alias->tcp_pgmask, link->alias->tcp,
344 &link->alias->tcp_pgidx, faddr);
345 break;
346 case IPPROTO_UDP:
347 pg = nat64lsn_get_pg(&link->alias->udp_chunkmask,
348 link->alias->udp_pgmask, link->alias->udp,
349 &link->alias->udp_pgidx, faddr);
350 break;
351 case IPPROTO_ICMP:
352 pg = nat64lsn_get_pg(&link->alias->icmp_chunkmask,
353 link->alias->icmp_pgmask, link->alias->icmp,
354 &link->alias->icmp_pgidx, faddr);
355 break;
356 default:
357 panic("%s: wrong proto %d", __func__, proto);
358 }
359 if (pg == NULL || (pg->flags & NAT64LSN_DEADPG) != 0)
360 return (NULL);
361
362 /* Check that PG has some free states */
363 state = NULL;
364 i = FREEMASK_BITCOUNT(pg, faddr);
365 while (i-- > 0) {
366 offset = FREEMASK_FFSLL(pg, faddr);
367 if (offset == 0) {
368 /*
369 * We lost the race.
370 * No more free states in this PG.
371 */
372 break;
373 }
374
375 /* Lets try to atomically grab the state */
376 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
377 state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
378 /* Initialize */
379 state->flags = proto != IPPROTO_TCP ? 0 :
380 convert_tcp_flags(f_id->_flags);
381 state->proto = proto;
382 state->aport = pg->base_port + offset - 1;
383 state->dport = f_id->dst_port;
384 state->sport = port;
385 state->ip6_dst = f_id->dst_ip6;
386 state->ip_dst = faddr;
387 state->ip_src = link->alias->addr;
388 state->hval = hval;
389 state->host = host;
390 SET_AGE(state->timestamp);
391
392 /* Insert new state into host's hash table */
393 HOST_LOCK(host);
394 SET_AGE(host->timestamp);
395 CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
396 state, entries);
397 host->states_count++;
398 HOST_UNLOCK(host);
399 NAT64STAT_INC(&cfg->base.stats, screated);
400 /* Mark the state as ready for translate4 */
401 ck_pr_fence_store();
402 ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
403 break;
404 }
405 }
406 return (state);
407 }
408
409 /*
410 * Inspects icmp packets to see if the message contains different
411 * packet header so we need to alter @addr and @port.
412 */
413 static int
inspect_icmp_mbuf(struct mbuf ** mp,uint8_t * proto,uint32_t * addr,uint16_t * port)414 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
415 uint16_t *port)
416 {
417 struct icmp *icmp;
418 struct ip *ip;
419 int off;
420 uint8_t inner_proto;
421
422 ip = mtod(*mp, struct ip *); /* Outer IP header */
423 off = (ip->ip_hl << 2) + ICMP_MINLEN;
424 if ((*mp)->m_len < off)
425 *mp = m_pullup(*mp, off);
426 if (*mp == NULL)
427 return (ENOMEM);
428
429 ip = mtod(*mp, struct ip *); /* Outer IP header */
430 icmp = L3HDR(ip, struct icmp *);
431 switch (icmp->icmp_type) {
432 case ICMP_ECHO:
433 case ICMP_ECHOREPLY:
434 /* Use icmp ID as distinguisher */
435 *port = ntohs(icmp->icmp_id);
436 return (0);
437 case ICMP_UNREACH:
438 case ICMP_TIMXCEED:
439 break;
440 default:
441 return (EOPNOTSUPP);
442 }
443 /*
444 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
445 * of ULP header.
446 */
447 if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
448 return (EINVAL);
449 if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
450 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
451 if (*mp == NULL)
452 return (ENOMEM);
453 ip = mtodo(*mp, off); /* Inner IP header */
454 inner_proto = ip->ip_p;
455 off += ip->ip_hl << 2; /* Skip inner IP header */
456 *addr = ntohl(ip->ip_src.s_addr);
457 if ((*mp)->m_len < off + ICMP_MINLEN)
458 *mp = m_pullup(*mp, off + ICMP_MINLEN);
459 if (*mp == NULL)
460 return (ENOMEM);
461 switch (inner_proto) {
462 case IPPROTO_TCP:
463 case IPPROTO_UDP:
464 /* Copy source port from the header */
465 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
466 *proto = inner_proto;
467 return (0);
468 case IPPROTO_ICMP:
469 /*
470 * We will translate only ICMP errors for our ICMP
471 * echo requests.
472 */
473 icmp = mtodo(*mp, off);
474 if (icmp->icmp_type != ICMP_ECHO)
475 return (EOPNOTSUPP);
476 *port = ntohs(icmp->icmp_id);
477 return (0);
478 };
479 return (EOPNOTSUPP);
480 }
481
482 static struct nat64lsn_state*
nat64lsn_get_state4to6(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias,in_addr_t faddr,uint16_t port,uint8_t proto)483 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
484 in_addr_t faddr, uint16_t port, uint8_t proto)
485 {
486 struct nat64lsn_state *state;
487 struct nat64lsn_pg *pg;
488 int chunk_idx, pg_idx, state_idx;
489
490 NAT64LSN_EPOCH_ASSERT();
491
492 if (port < NAT64_MIN_PORT)
493 return (NULL);
494 /*
495 * Alias keeps 32 pgchunks for each protocol.
496 * Each pgchunk has 32 pointers to portgroup.
497 * Each portgroup has 64 states for ports.
498 */
499 port -= NAT64_MIN_PORT;
500 chunk_idx = port / 2048;
501
502 port -= chunk_idx * 2048;
503 pg_idx = port / 64;
504 state_idx = port % 64;
505
506 /*
507 * First check in proto_chunkmask that we have allocated PG chunk.
508 * Then check in proto_pgmask that we have valid PG pointer.
509 */
510 pg = NULL;
511 switch (proto) {
512 case IPPROTO_TCP:
513 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
514 ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
515 pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
516 break;
517 }
518 return (NULL);
519 case IPPROTO_UDP:
520 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
521 ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
522 pg = alias->udp[chunk_idx]->pgptr[pg_idx];
523 break;
524 }
525 return (NULL);
526 case IPPROTO_ICMP:
527 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
528 ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
529 pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
530 break;
531 }
532 return (NULL);
533 default:
534 panic("%s: wrong proto %d", __func__, proto);
535 }
536 if (pg == NULL)
537 return (NULL);
538
539 if (FREEMASK_ISSET(pg, faddr, state_idx))
540 return (NULL);
541
542 state = &STATES_CHUNK(pg, faddr)->state[state_idx];
543 ck_pr_fence_load();
544 if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
545 return (state);
546 return (NULL);
547 }
548
549 /*
550 * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
551 * that might be unknown until reassembling is completed.
552 */
553 static struct mbuf*
nat64lsn_reassemble4(struct nat64lsn_cfg * cfg,struct mbuf * m,uint16_t * port)554 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
555 uint16_t *port)
556 {
557 struct ip *ip;
558 int len;
559
560 m = ip_reass(m);
561 if (m == NULL)
562 return (NULL);
563 /* IP header must be contigious after ip_reass() */
564 ip = mtod(m, struct ip *);
565 len = ip->ip_hl << 2;
566 switch (ip->ip_p) {
567 case IPPROTO_ICMP:
568 len += ICMP_MINLEN;
569 break;
570 case IPPROTO_TCP:
571 len += sizeof(struct tcphdr);
572 break;
573 case IPPROTO_UDP:
574 len += sizeof(struct udphdr);
575 break;
576 default:
577 m_freem(m);
578 NAT64STAT_INC(&cfg->base.stats, noproto);
579 return (NULL);
580 }
581 if (m->m_len < len) {
582 m = m_pullup(m, len);
583 if (m == NULL) {
584 NAT64STAT_INC(&cfg->base.stats, nomem);
585 return (NULL);
586 }
587 ip = mtod(m, struct ip *);
588 }
589 switch (ip->ip_p) {
590 case IPPROTO_TCP:
591 *port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
592 break;
593 case IPPROTO_UDP:
594 *port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
595 break;
596 }
597 return (m);
598 }
599
600 static int
nat64lsn_translate4(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id,struct mbuf ** mp)601 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
602 const struct ipfw_flow_id *f_id, struct mbuf **mp)
603 {
604 struct pfloghdr loghdr, *logdata;
605 struct in6_addr src6;
606 struct nat64lsn_state *state;
607 struct nat64lsn_alias *alias;
608 uint32_t addr, flags;
609 uint16_t port, ts;
610 int ret;
611 uint8_t proto;
612
613 addr = f_id->dst_ip;
614 port = f_id->dst_port;
615 proto = f_id->proto;
616 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
617 NAT64STAT_INC(&cfg->base.stats, nomatch4);
618 return (cfg->nomatch_verdict);
619 }
620
621 /* Reassemble fragments if needed */
622 ret = ntohs(mtod(*mp, struct ip *)->ip_off);
623 if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
624 *mp = nat64lsn_reassemble4(cfg, *mp, &port);
625 if (*mp == NULL)
626 return (IP_FW_DENY);
627 }
628
629 /* Check if protocol is supported */
630 switch (proto) {
631 case IPPROTO_ICMP:
632 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
633 if (ret != 0) {
634 if (ret == ENOMEM) {
635 NAT64STAT_INC(&cfg->base.stats, nomem);
636 return (IP_FW_DENY);
637 }
638 NAT64STAT_INC(&cfg->base.stats, noproto);
639 return (cfg->nomatch_verdict);
640 }
641 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
642 NAT64STAT_INC(&cfg->base.stats, nomatch4);
643 return (cfg->nomatch_verdict);
644 }
645 /* FALLTHROUGH */
646 case IPPROTO_TCP:
647 case IPPROTO_UDP:
648 break;
649 default:
650 NAT64STAT_INC(&cfg->base.stats, noproto);
651 return (cfg->nomatch_verdict);
652 }
653
654 alias = &ALIAS_BYHASH(cfg, addr);
655 MPASS(addr == alias->addr);
656
657 /* Check that we have state for this port */
658 state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
659 port, proto);
660 if (state == NULL) {
661 NAT64STAT_INC(&cfg->base.stats, nomatch4);
662 return (cfg->nomatch_verdict);
663 }
664
665 /* TODO: Check flags to see if we need to do some static mapping */
666
667 /* Update some state fields if need */
668 SET_AGE(ts);
669 if (f_id->proto == IPPROTO_TCP)
670 flags = convert_tcp_flags(f_id->_flags);
671 else
672 flags = 0;
673 if (state->timestamp != ts)
674 state->timestamp = ts;
675 if ((state->flags & flags) != flags)
676 state->flags |= flags;
677
678 port = htons(state->sport);
679 src6 = state->ip6_dst;
680
681 if (cfg->base.flags & NAT64_LOG) {
682 logdata = &loghdr;
683 nat64lsn_log(logdata, *mp, AF_INET, state);
684 } else
685 logdata = NULL;
686
687 /*
688 * We already have src6 with embedded address, but it is possible,
689 * that src_ip is different than state->ip_dst, this is why we
690 * do embedding again.
691 */
692 nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
693 ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
694 &cfg->base, logdata);
695 if (ret == NAT64SKIP)
696 return (cfg->nomatch_verdict);
697 if (ret == NAT64RETURN)
698 *mp = NULL;
699 return (IP_FW_DENY);
700 }
701
702 /*
703 * Check if particular state is stale and should be deleted.
704 * Return 1 if true, 0 otherwise.
705 */
706 static int
nat64lsn_check_state(struct nat64lsn_cfg * cfg,struct nat64lsn_state * state)707 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
708 {
709 int age, ttl;
710
711 /* State was marked as stale in previous pass. */
712 if (ISSET32(state->flags, NAT64_BIT_STALE))
713 return (1);
714
715 /* State is not yet initialized, it is going to be READY */
716 if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
717 return (0);
718
719 age = GET_AGE(state->timestamp);
720 switch (state->proto) {
721 case IPPROTO_TCP:
722 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
723 ttl = cfg->st_close_ttl;
724 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
725 ttl = cfg->st_estab_ttl;
726 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
727 ttl = cfg->st_syn_ttl;
728 else
729 ttl = cfg->st_syn_ttl;
730 if (age > ttl)
731 return (1);
732 break;
733 case IPPROTO_UDP:
734 if (age > cfg->st_udp_ttl)
735 return (1);
736 break;
737 case IPPROTO_ICMP:
738 if (age > cfg->st_icmp_ttl)
739 return (1);
740 break;
741 }
742 return (0);
743 }
744
745 #define PGCOUNT_ADD(alias, proto, value) \
746 switch (proto) { \
747 case IPPROTO_TCP: (alias)->tcp_pgcount += (value); break; \
748 case IPPROTO_UDP: (alias)->udp_pgcount += (value); break; \
749 case IPPROTO_ICMP: (alias)->icmp_pgcount += (value); break; \
750 }
751 #define PGCOUNT_INC(alias, proto) PGCOUNT_ADD(alias, proto, 1)
752 #define PGCOUNT_DEC(alias, proto) PGCOUNT_ADD(alias, proto, -1)
753
754 static inline void
nat64lsn_state_cleanup(struct nat64lsn_state * state)755 nat64lsn_state_cleanup(struct nat64lsn_state *state)
756 {
757
758 /*
759 * Reset READY flag and wait until it become
760 * safe for translate4.
761 */
762 ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
763 /*
764 * And set STALE flag for deferred deletion in the
765 * next pass of nat64lsn_maintain_pg().
766 */
767 ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
768 ck_pr_fence_store();
769 }
770
771 static int
nat64lsn_maintain_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_pg * pg)772 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
773 {
774 struct nat64lsn_state *state;
775 struct nat64lsn_host *host;
776 uint64_t freemask;
777 int c, i, update_age;
778
779 update_age = 0;
780 for (c = 0; c < pg->chunks_count; c++) {
781 FREEMASK_COPY(pg, c, freemask);
782 for (i = 0; i < 64; i++) {
783 if (ISSET64(freemask, i))
784 continue;
785 state = &STATES_CHUNK(pg, c)->state[i];
786 if (nat64lsn_check_state(cfg, state) == 0) {
787 update_age = 1;
788 continue;
789 }
790 /*
791 * Expire state:
792 * 1. Mark as STALE and unlink from host's hash.
793 * 2. Set bit in freemask.
794 */
795 if (ISSET32(state->flags, NAT64_BIT_STALE)) {
796 /*
797 * State was marked as STALE in previous
798 * pass. Now it is safe to release it.
799 */
800 state->flags = 0;
801 ck_pr_fence_store();
802 FREEMASK_BTS(pg, c, i);
803 NAT64STAT_INC(&cfg->base.stats, sdeleted);
804 continue;
805 }
806 MPASS(state->flags & NAT64_FLAG_READY);
807
808 host = state->host;
809 HOST_LOCK(host);
810 CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
811 state, nat64lsn_state, entries);
812 /*
813 * Now translate6 will not use this state.
814 */
815 host->states_count--;
816 HOST_UNLOCK(host);
817 nat64lsn_state_cleanup(state);
818 }
819 }
820
821 /*
822 * We have some alive states, update timestamp.
823 */
824 if (update_age)
825 SET_AGE(pg->timestamp);
826
827 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
828 return (0);
829
830 return (1);
831 }
832
833 static void
nat64lsn_expire_portgroups(struct nat64lsn_cfg * cfg,struct nat64lsn_pg_slist * portgroups)834 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
835 struct nat64lsn_pg_slist *portgroups)
836 {
837 struct nat64lsn_alias *alias;
838 struct nat64lsn_pg *pg, *tpg;
839 uint32_t *pgmask, *pgidx;
840 int i, idx;
841
842 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
843 alias = &cfg->aliases[i];
844 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
845 if (nat64lsn_maintain_pg(cfg, pg) == 0)
846 continue;
847 /* Always keep first PG */
848 if (pg->base_port == NAT64_MIN_PORT)
849 continue;
850 /*
851 * PG expires in two passes:
852 * 1. Reset bit in pgmask, mark it as DEAD.
853 * 2. Unlink it and schedule for deferred destroying.
854 */
855 idx = (pg->base_port - NAT64_MIN_PORT) / 64;
856 switch (pg->proto) {
857 case IPPROTO_TCP:
858 pgmask = alias->tcp_pgmask;
859 pgidx = &alias->tcp_pgidx;
860 break;
861 case IPPROTO_UDP:
862 pgmask = alias->udp_pgmask;
863 pgidx = &alias->udp_pgidx;
864 break;
865 case IPPROTO_ICMP:
866 pgmask = alias->icmp_pgmask;
867 pgidx = &alias->icmp_pgidx;
868 break;
869 }
870 if (pg->flags & NAT64LSN_DEADPG) {
871 /* Unlink PG from alias's chain */
872 ALIAS_LOCK(alias);
873 CK_SLIST_REMOVE(&alias->portgroups, pg,
874 nat64lsn_pg, entries);
875 PGCOUNT_DEC(alias, pg->proto);
876 ALIAS_UNLOCK(alias);
877 /*
878 * Link it to job's chain for deferred
879 * destroying.
880 */
881 NAT64STAT_INC(&cfg->base.stats, spgdeleted);
882 CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
883 continue;
884 }
885
886 /* Reset the corresponding bit in pgmask array. */
887 ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
888 pg->flags |= NAT64LSN_DEADPG;
889 ck_pr_fence_store();
890 /* If last used PG points to this PG, reset it. */
891 ck_pr_cas_32(pgidx, idx, 0);
892 }
893 }
894 }
895
896 static void
nat64lsn_expire_hosts(struct nat64lsn_cfg * cfg,struct nat64lsn_hosts_slist * hosts)897 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
898 struct nat64lsn_hosts_slist *hosts)
899 {
900 struct nat64lsn_host *host, *tmp;
901 int i;
902
903 for (i = 0; i < cfg->hosts_hashsize; i++) {
904 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
905 entries, tmp) {
906 /* Is host was marked in previous call? */
907 if (host->flags & NAT64LSN_DEADHOST) {
908 if (host->states_count > 0 ||
909 GET_AGE(host->timestamp) <
910 cfg->host_delete_delay) {
911 host->flags &= ~NAT64LSN_DEADHOST;
912 continue;
913 }
914 /*
915 * Unlink host from hash table and schedule
916 * it for deferred destroying.
917 */
918 CFG_LOCK(cfg);
919 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
920 nat64lsn_host, entries);
921 cfg->hosts_count--;
922 CFG_UNLOCK(cfg);
923 CK_SLIST_INSERT_HEAD(hosts, host, entries);
924 continue;
925 }
926 if (host->states_count > 0 ||
927 GET_AGE(host->timestamp) < cfg->host_delete_delay)
928 continue;
929 /* Mark host as going to be expired in next pass */
930 host->flags |= NAT64LSN_DEADHOST;
931 ck_pr_fence_store();
932 }
933 }
934 }
935
936 static struct nat64lsn_pgchunk*
nat64lsn_expire_pgchunk(struct nat64lsn_cfg * cfg)937 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
938 {
939 #if 0
940 struct nat64lsn_alias *alias;
941 struct nat64lsn_pgchunk *chunk;
942 uint32_t pgmask;
943 int i, c;
944
945 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
946 alias = &cfg->aliases[i];
947 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
948 continue;
949 /* Always keep single chunk allocated */
950 for (c = 1; c < 32; c++) {
951 if ((alias->tcp_chunkmask & (1 << c)) == 0)
952 break;
953 chunk = ck_pr_load_ptr(&alias->tcp[c]);
954 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
955 continue;
956 ck_pr_btr_32(&alias->tcp_chunkmask, c);
957 ck_pr_fence_load();
958 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
959 continue;
960 }
961 }
962 #endif
963 return (NULL);
964 }
965
966 #if 0
967 static void
968 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
969 {
970 struct nat64lsn_host *h;
971 struct nat64lsn_states_slist *hash;
972 int i, j, hsize;
973
974 for (i = 0; i < cfg->hosts_hashsize; i++) {
975 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
976 if (h->states_count / 2 < h->states_hashsize ||
977 h->states_hashsize >= NAT64LSN_MAX_HSIZE)
978 continue;
979 hsize = h->states_hashsize * 2;
980 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
981 if (hash == NULL)
982 continue;
983 for (j = 0; j < hsize; j++)
984 CK_SLIST_INIT(&hash[i]);
985
986 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
987 }
988 }
989 }
990 #endif
991
992 /*
993 * This procedure is used to perform various maintance
994 * on dynamic hash list. Currently it is called every 4 seconds.
995 */
996 static void
nat64lsn_periodic(void * data)997 nat64lsn_periodic(void *data)
998 {
999 struct nat64lsn_job_item *ji;
1000 struct nat64lsn_cfg *cfg;
1001
1002 cfg = (struct nat64lsn_cfg *) data;
1003 CURVNET_SET(cfg->vp);
1004 if (cfg->hosts_count > 0) {
1005 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1006 if (ji != NULL) {
1007 ji->jtype = JTYPE_DESTROY;
1008 CK_SLIST_INIT(&ji->hosts);
1009 CK_SLIST_INIT(&ji->portgroups);
1010 nat64lsn_expire_hosts(cfg, &ji->hosts);
1011 nat64lsn_expire_portgroups(cfg, &ji->portgroups);
1012 ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
1013 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
1014 nat64lsn_job_destroy);
1015 } else
1016 NAT64STAT_INC(&cfg->base.stats, jnomem);
1017 }
1018 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
1019 CURVNET_RESTORE();
1020 }
1021
1022 #define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
1023 #define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
1024 #define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
1025 static int
nat64lsn_alloc_host(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1026 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1027 {
1028 char a[INET6_ADDRSTRLEN];
1029 struct nat64lsn_aliaslink *link;
1030 struct nat64lsn_host *host;
1031 struct nat64lsn_state *state;
1032 uint32_t hval, data[2];
1033 int i;
1034
1035 /* Check that host was not yet added. */
1036 NAT64LSN_EPOCH_ASSERT();
1037 CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1038 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1039 /* The host was allocated in previous call. */
1040 ji->host = host;
1041 goto get_state;
1042 }
1043 }
1044
1045 host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1046 if (ji->host == NULL)
1047 return (HOST_ERROR(1));
1048
1049 host->states_hashsize = NAT64LSN_HSIZE;
1050 host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1051 host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1052 if (host->states_hash == NULL) {
1053 uma_zfree(nat64lsn_host_zone, host);
1054 return (HOST_ERROR(2));
1055 }
1056
1057 link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1058 if (link == NULL) {
1059 free(host->states_hash, M_NAT64LSN);
1060 uma_zfree(nat64lsn_host_zone, host);
1061 return (HOST_ERROR(3));
1062 }
1063
1064 /* Initialize */
1065 HOST_LOCK_INIT(host);
1066 SET_AGE(host->timestamp);
1067 host->addr = ji->f_id.src_ip6;
1068 host->hval = ji->src6_hval;
1069 host->flags = 0;
1070 host->states_count = 0;
1071 CK_SLIST_INIT(&host->aliases);
1072 for (i = 0; i < host->states_hashsize; i++)
1073 CK_SLIST_INIT(&host->states_hash[i]);
1074
1075 link->alias = nat64lsn_get_alias(cfg, &ji->f_id);
1076 CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1077
1078 ALIAS_LOCK(link->alias);
1079 CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1080 link->alias->hosts_count++;
1081 ALIAS_UNLOCK(link->alias);
1082
1083 CFG_LOCK(cfg);
1084 CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1085 cfg->hosts_count++;
1086 CFG_UNLOCK(cfg);
1087
1088 get_state:
1089 data[0] = ji->faddr;
1090 data[1] = (ji->f_id.dst_port << 16) | ji->port;
1091 ji->state_hval = hval = STATE_HVAL(cfg, data);
1092 state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1093 ji->faddr, ji->port, ji->proto);
1094 /*
1095 * We failed to obtain new state, used alias needs new PG.
1096 * XXX: or another alias should be used.
1097 */
1098 if (state == NULL) {
1099 /* Try to allocate new PG */
1100 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1101 return (HOST_ERROR(4));
1102 /* We assume that nat64lsn_alloc_pg() got state */
1103 } else
1104 ji->state = state;
1105
1106 ji->done = 1;
1107 DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1108 inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1109 return (HOST_ERROR(0));
1110 }
1111
1112 static int
nat64lsn_find_pg_place(uint32_t * data)1113 nat64lsn_find_pg_place(uint32_t *data)
1114 {
1115 int i;
1116
1117 for (i = 0; i < 32; i++) {
1118 if (~data[i] == 0)
1119 continue;
1120 return (i * 32 + ffs(~data[i]) - 1);
1121 }
1122 return (-1);
1123 }
1124
1125 static int
nat64lsn_alloc_proto_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias,uint32_t * chunkmask,uint32_t * pgmask,struct nat64lsn_pgchunk ** chunks,uint32_t * pgidx,uint8_t proto)1126 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1127 struct nat64lsn_alias *alias, uint32_t *chunkmask, uint32_t *pgmask,
1128 struct nat64lsn_pgchunk **chunks, uint32_t *pgidx, uint8_t proto)
1129 {
1130 struct nat64lsn_pg *pg;
1131 int i, pg_idx, chunk_idx;
1132
1133 /* Find place in pgchunk where PG can be added */
1134 pg_idx = nat64lsn_find_pg_place(pgmask);
1135 if (pg_idx < 0) /* no more PGs */
1136 return (PG_ERROR(1));
1137 /* Check that we have allocated pgchunk for given PG index */
1138 chunk_idx = pg_idx / 32;
1139 if (!ISSET32(*chunkmask, chunk_idx)) {
1140 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1141 M_NOWAIT);
1142 if (chunks[chunk_idx] == NULL)
1143 return (PG_ERROR(2));
1144 ck_pr_bts_32(chunkmask, chunk_idx);
1145 ck_pr_fence_store();
1146 }
1147 /* Allocate PG and states chunks */
1148 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1149 if (pg == NULL)
1150 return (PG_ERROR(3));
1151 pg->chunks_count = cfg->states_chunks;
1152 if (pg->chunks_count > 1) {
1153 pg->freemask_chunk = malloc(pg->chunks_count *
1154 sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1155 if (pg->freemask_chunk == NULL) {
1156 uma_zfree(nat64lsn_pg_zone, pg);
1157 return (PG_ERROR(4));
1158 }
1159 pg->states_chunk = malloc(pg->chunks_count *
1160 sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1161 M_NOWAIT | M_ZERO);
1162 if (pg->states_chunk == NULL) {
1163 free(pg->freemask_chunk, M_NAT64LSN);
1164 uma_zfree(nat64lsn_pg_zone, pg);
1165 return (PG_ERROR(5));
1166 }
1167 for (i = 0; i < pg->chunks_count; i++) {
1168 pg->states_chunk[i] = uma_zalloc(
1169 nat64lsn_state_zone, M_NOWAIT);
1170 if (pg->states_chunk[i] == NULL)
1171 goto states_failed;
1172 }
1173 memset(pg->freemask_chunk, 0xff,
1174 sizeof(uint64_t) * pg->chunks_count);
1175 } else {
1176 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1177 if (pg->states == NULL) {
1178 uma_zfree(nat64lsn_pg_zone, pg);
1179 return (PG_ERROR(6));
1180 }
1181 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1182 }
1183
1184 /* Initialize PG and hook it to pgchunk */
1185 SET_AGE(pg->timestamp);
1186 pg->flags = 0;
1187 pg->proto = proto;
1188 pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1189 ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1190 ck_pr_fence_store();
1191
1192 /* Set bit in pgmask and set index of last used PG */
1193 ck_pr_bts_32(&pgmask[chunk_idx], pg_idx % 32);
1194 ck_pr_store_32(pgidx, pg_idx);
1195
1196 ALIAS_LOCK(alias);
1197 CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1198 SET_AGE(alias->timestamp);
1199 PGCOUNT_INC(alias, proto);
1200 ALIAS_UNLOCK(alias);
1201 NAT64STAT_INC(&cfg->base.stats, spgcreated);
1202 return (PG_ERROR(0));
1203
1204 states_failed:
1205 for (i = 0; i < pg->chunks_count; i++)
1206 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1207 free(pg->freemask_chunk, M_NAT64LSN);
1208 free(pg->states_chunk, M_NAT64LSN);
1209 uma_zfree(nat64lsn_pg_zone, pg);
1210 return (PG_ERROR(7));
1211 }
1212
1213 static int
nat64lsn_alloc_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1214 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1215 {
1216 struct nat64lsn_aliaslink *link;
1217 struct nat64lsn_alias *alias;
1218 int ret;
1219
1220 link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1221 if (link == NULL)
1222 return (PG_ERROR(1));
1223
1224 /*
1225 * TODO: check that we did not already allocated PG in
1226 * previous call.
1227 */
1228
1229 ret = 0;
1230 alias = link->alias;
1231 /* Find place in pgchunk where PG can be added */
1232 switch (ji->proto) {
1233 case IPPROTO_TCP:
1234 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1235 &alias->tcp_chunkmask, alias->tcp_pgmask,
1236 alias->tcp, &alias->tcp_pgidx, ji->proto);
1237 break;
1238 case IPPROTO_UDP:
1239 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1240 &alias->udp_chunkmask, alias->udp_pgmask,
1241 alias->udp, &alias->udp_pgidx, ji->proto);
1242 break;
1243 case IPPROTO_ICMP:
1244 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1245 &alias->icmp_chunkmask, alias->icmp_pgmask,
1246 alias->icmp, &alias->icmp_pgidx, ji->proto);
1247 break;
1248 default:
1249 panic("%s: wrong proto %d", __func__, ji->proto);
1250 }
1251 if (ret == PG_ERROR(1)) {
1252 /*
1253 * PG_ERROR(1) means that alias lacks free PGs
1254 * XXX: try next alias.
1255 */
1256 printf("NAT64LSN: %s: failed to obtain PG\n",
1257 __func__);
1258 return (ret);
1259 }
1260 if (ret == PG_ERROR(0)) {
1261 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1262 ji->state_hval, ji->faddr, ji->port, ji->proto);
1263 if (ji->state == NULL)
1264 ret = PG_ERROR(8);
1265 else
1266 ji->done = 1;
1267 }
1268 return (ret);
1269 }
1270
1271 static void
nat64lsn_do_request(void * data)1272 nat64lsn_do_request(void *data)
1273 {
1274 struct epoch_tracker et;
1275 struct nat64lsn_job_head jhead;
1276 struct nat64lsn_job_item *ji, *ji2;
1277 struct nat64lsn_cfg *cfg;
1278 int jcount;
1279 uint8_t flags;
1280
1281 cfg = (struct nat64lsn_cfg *)data;
1282 if (cfg->jlen == 0)
1283 return;
1284
1285 CURVNET_SET(cfg->vp);
1286 STAILQ_INIT(&jhead);
1287
1288 /* Grab queue */
1289 JQUEUE_LOCK();
1290 STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1291 jcount = cfg->jlen;
1292 cfg->jlen = 0;
1293 JQUEUE_UNLOCK();
1294
1295 /* TODO: check if we need to resize hash */
1296
1297 NAT64STAT_INC(&cfg->base.stats, jcalls);
1298 DPRINTF(DP_JQUEUE, "count=%d", jcount);
1299
1300 /*
1301 * TODO:
1302 * What we should do here is to build a hash
1303 * to ensure we don't have lots of duplicate requests.
1304 * Skip this for now.
1305 *
1306 * TODO: Limit per-call number of items
1307 */
1308
1309 NAT64LSN_EPOCH_ENTER(et);
1310 STAILQ_FOREACH(ji, &jhead, entries) {
1311 switch (ji->jtype) {
1312 case JTYPE_NEWHOST:
1313 if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1314 NAT64STAT_INC(&cfg->base.stats, jhostfails);
1315 break;
1316 case JTYPE_NEWPORTGROUP:
1317 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1318 NAT64STAT_INC(&cfg->base.stats, jportfails);
1319 break;
1320 default:
1321 continue;
1322 }
1323 if (ji->done != 0) {
1324 flags = ji->proto != IPPROTO_TCP ? 0 :
1325 convert_tcp_flags(ji->f_id._flags);
1326 nat64lsn_translate6_internal(cfg, &ji->m,
1327 ji->state, flags);
1328 NAT64STAT_INC(&cfg->base.stats, jreinjected);
1329 }
1330 }
1331 NAT64LSN_EPOCH_EXIT(et);
1332
1333 ji = STAILQ_FIRST(&jhead);
1334 while (ji != NULL) {
1335 ji2 = STAILQ_NEXT(ji, entries);
1336 /*
1337 * In any case we must free mbuf if
1338 * translator did not consumed it.
1339 */
1340 m_freem(ji->m);
1341 uma_zfree(nat64lsn_job_zone, ji);
1342 ji = ji2;
1343 }
1344 CURVNET_RESTORE();
1345 }
1346
1347 static struct nat64lsn_job_item *
nat64lsn_create_job(struct nat64lsn_cfg * cfg,int jtype)1348 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1349 {
1350 struct nat64lsn_job_item *ji;
1351
1352 /*
1353 * Do not try to lock possibly contested mutex if we're near the
1354 * limit. Drop packet instead.
1355 */
1356 ji = NULL;
1357 if (cfg->jlen >= cfg->jmaxlen)
1358 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1359 else {
1360 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1361 if (ji == NULL)
1362 NAT64STAT_INC(&cfg->base.stats, jnomem);
1363 }
1364 if (ji == NULL) {
1365 NAT64STAT_INC(&cfg->base.stats, dropped);
1366 DPRINTF(DP_DROPS, "failed to create job");
1367 } else {
1368 ji->jtype = jtype;
1369 ji->done = 0;
1370 }
1371 return (ji);
1372 }
1373
1374 static void
nat64lsn_enqueue_job(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1375 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1376 {
1377
1378 JQUEUE_LOCK();
1379 STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1380 NAT64STAT_INC(&cfg->base.stats, jrequests);
1381 cfg->jlen++;
1382
1383 if (callout_pending(&cfg->jcallout) == 0)
1384 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1385 JQUEUE_UNLOCK();
1386 }
1387
1388 /*
1389 * This function is used to clean up the result of less likely possible
1390 * race condition, when host object was deleted, but some translation
1391 * state was created before it is destroyed.
1392 *
1393 * Since the state expiration removes state from host's hash table,
1394 * we need to be sure, that there will not any states, that are linked
1395 * with this host entry.
1396 */
1397 static void
nat64lsn_host_cleanup(struct nat64lsn_host * host)1398 nat64lsn_host_cleanup(struct nat64lsn_host *host)
1399 {
1400 struct nat64lsn_state *state, *ts;
1401 int i;
1402
1403 printf("NAT64LSN: %s: race condition has been detected for host %p\n",
1404 __func__, host);
1405 for (i = 0; i < host->states_hashsize; i++) {
1406 CK_SLIST_FOREACH_SAFE(state, &host->states_hash[i],
1407 entries, ts) {
1408 /*
1409 * We can remove the state without lock,
1410 * because this host entry is unlinked and will
1411 * be destroyed.
1412 */
1413 CK_SLIST_REMOVE(&host->states_hash[i], state,
1414 nat64lsn_state, entries);
1415 host->states_count--;
1416 nat64lsn_state_cleanup(state);
1417 }
1418 }
1419 MPASS(host->states_count == 0);
1420 }
1421
1422 /*
1423 * This function is used to clean up the result of less likely possible
1424 * race condition, when portgroup was deleted, but some translation state
1425 * was created before it is destroyed.
1426 *
1427 * Since states entries are accessible via host's hash table, we need
1428 * to be sure, that there will not any states from this PG, that are
1429 * linked with any host entries.
1430 */
1431 static void
nat64lsn_pg_cleanup(struct nat64lsn_pg * pg)1432 nat64lsn_pg_cleanup(struct nat64lsn_pg *pg)
1433 {
1434 struct nat64lsn_state *state;
1435 uint64_t usedmask;
1436 int c, i;
1437
1438 printf("NAT64LSN: %s: race condition has been detected for pg %p\n",
1439 __func__, pg);
1440 for (c = 0; c < pg->chunks_count; c++) {
1441 /*
1442 * Use inverted freemask to find what state was created.
1443 */
1444 usedmask = ~(*FREEMASK_CHUNK(pg, c));
1445 if (usedmask == 0)
1446 continue;
1447 for (i = 0; i < 64; i++) {
1448 if (!ISSET64(usedmask, i))
1449 continue;
1450 state = &STATES_CHUNK(pg, c)->state[i];
1451 /*
1452 * If we have STALE bit, this means that state
1453 * is already unlinked from host's hash table.
1454 * Thus we can just reset the bit in mask and
1455 * schedule destroying in the next epoch call.
1456 */
1457 if (ISSET32(state->flags, NAT64_BIT_STALE)) {
1458 FREEMASK_BTS(pg, c, i);
1459 continue;
1460 }
1461 /*
1462 * There is small window, when we have bit
1463 * grabbed from freemask, but state is not yet
1464 * linked into host's hash table.
1465 * Check for READY flag, it is set just after
1466 * linking. If it is not set, defer cleanup
1467 * for next call.
1468 */
1469 if (ISSET32(state->flags, NAT64_BIT_READY_IPV4)) {
1470 struct nat64lsn_host *host;
1471
1472 host = state->host;
1473 HOST_LOCK(host);
1474 CK_SLIST_REMOVE(&STATE_HASH(host,
1475 state->hval), state, nat64lsn_state,
1476 entries);
1477 host->states_count--;
1478 HOST_UNLOCK(host);
1479 nat64lsn_state_cleanup(state);
1480 }
1481 }
1482 }
1483 }
1484
1485 static void
nat64lsn_job_destroy(epoch_context_t ctx)1486 nat64lsn_job_destroy(epoch_context_t ctx)
1487 {
1488 struct nat64lsn_hosts_slist hosts;
1489 struct nat64lsn_pg_slist portgroups;
1490 struct nat64lsn_job_item *ji;
1491 struct nat64lsn_host *host;
1492 struct nat64lsn_pg *pg;
1493 int i;
1494
1495 CK_SLIST_INIT(&hosts);
1496 CK_SLIST_INIT(&portgroups);
1497 ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1498 MPASS(ji->jtype == JTYPE_DESTROY);
1499 while (!CK_SLIST_EMPTY(&ji->hosts)) {
1500 host = CK_SLIST_FIRST(&ji->hosts);
1501 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1502 if (host->states_count > 0) {
1503 /*
1504 * The state has been created during host deletion.
1505 */
1506 printf("NAT64LSN: %s: destroying host with %d "
1507 "states\n", __func__, host->states_count);
1508 /*
1509 * We need to cleanup these states to avoid
1510 * possible access to already deleted host in
1511 * the state expiration code.
1512 */
1513 nat64lsn_host_cleanup(host);
1514 CK_SLIST_INSERT_HEAD(&hosts, host, entries);
1515 /*
1516 * Keep host entry for next deferred destroying.
1517 * In the next epoch its states will be not
1518 * accessible.
1519 */
1520 continue;
1521 }
1522 nat64lsn_destroy_host(host);
1523 }
1524 while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1525 pg = CK_SLIST_FIRST(&ji->portgroups);
1526 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1527 for (i = 0; i < pg->chunks_count; i++) {
1528 if (FREEMASK_BITCOUNT(pg, i) != 64) {
1529 /*
1530 * A state has been created during
1531 * PG deletion.
1532 */
1533 printf("NAT64LSN: %s: destroying PG %p "
1534 "with non-empty chunk %d\n", __func__,
1535 pg, i);
1536 nat64lsn_pg_cleanup(pg);
1537 CK_SLIST_INSERT_HEAD(&portgroups,
1538 pg, entries);
1539 i = -1;
1540 break;
1541 }
1542 }
1543 if (i != -1)
1544 nat64lsn_destroy_pg(pg);
1545 }
1546 if (CK_SLIST_EMPTY(&hosts) &&
1547 CK_SLIST_EMPTY(&portgroups)) {
1548 uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1549 uma_zfree(nat64lsn_job_zone, ji);
1550 return;
1551 }
1552
1553 /* Schedule job item again */
1554 CK_SLIST_MOVE(&ji->hosts, &hosts, entries);
1555 CK_SLIST_MOVE(&ji->portgroups, &portgroups, entries);
1556 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx, nat64lsn_job_destroy);
1557 }
1558
1559 static int
nat64lsn_request_host(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id,struct mbuf ** mp,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)1560 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1561 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1562 in_addr_t faddr, uint16_t port, uint8_t proto)
1563 {
1564 struct nat64lsn_job_item *ji;
1565
1566 ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1567 if (ji != NULL) {
1568 ji->m = *mp;
1569 ji->f_id = *f_id;
1570 ji->faddr = faddr;
1571 ji->port = port;
1572 ji->proto = proto;
1573 ji->src6_hval = hval;
1574
1575 nat64lsn_enqueue_job(cfg, ji);
1576 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1577 *mp = NULL;
1578 }
1579 return (IP_FW_DENY);
1580 }
1581
1582 static int
nat64lsn_request_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id,struct mbuf ** mp,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)1583 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1584 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1585 in_addr_t faddr, uint16_t port, uint8_t proto)
1586 {
1587 struct nat64lsn_job_item *ji;
1588
1589 ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1590 if (ji != NULL) {
1591 ji->m = *mp;
1592 ji->f_id = *f_id;
1593 ji->faddr = faddr;
1594 ji->port = port;
1595 ji->proto = proto;
1596 ji->state_hval = hval;
1597 ji->host = host;
1598
1599 nat64lsn_enqueue_job(cfg, ji);
1600 NAT64STAT_INC(&cfg->base.stats, jportreq);
1601 *mp = NULL;
1602 }
1603 return (IP_FW_DENY);
1604 }
1605
1606 static int
nat64lsn_translate6_internal(struct nat64lsn_cfg * cfg,struct mbuf ** mp,struct nat64lsn_state * state,uint8_t flags)1607 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1608 struct nat64lsn_state *state, uint8_t flags)
1609 {
1610 struct pfloghdr loghdr, *logdata;
1611 int ret;
1612 uint16_t ts;
1613
1614 /* Update timestamp and flags if needed */
1615 SET_AGE(ts);
1616 if (state->timestamp != ts)
1617 state->timestamp = ts;
1618 if ((state->flags & flags) != 0)
1619 state->flags |= flags;
1620
1621 if (cfg->base.flags & NAT64_LOG) {
1622 logdata = &loghdr;
1623 nat64lsn_log(logdata, *mp, AF_INET6, state);
1624 } else
1625 logdata = NULL;
1626
1627 ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1628 htons(state->aport), &cfg->base, logdata);
1629 if (ret == NAT64SKIP)
1630 return (cfg->nomatch_verdict);
1631 if (ret == NAT64RETURN)
1632 *mp = NULL;
1633 return (IP_FW_DENY);
1634 }
1635
1636 static int
nat64lsn_translate6(struct nat64lsn_cfg * cfg,struct ipfw_flow_id * f_id,struct mbuf ** mp)1637 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1638 struct mbuf **mp)
1639 {
1640 struct nat64lsn_state *state;
1641 struct nat64lsn_host *host;
1642 struct icmp6_hdr *icmp6;
1643 uint32_t addr, hval, data[2];
1644 int offset, proto;
1645 uint16_t port;
1646 uint8_t flags;
1647
1648 /* Check if protocol is supported */
1649 port = f_id->src_port;
1650 proto = f_id->proto;
1651 switch (f_id->proto) {
1652 case IPPROTO_ICMPV6:
1653 /*
1654 * For ICMPv6 echo reply/request we use icmp6_id as
1655 * local port.
1656 */
1657 offset = 0;
1658 proto = nat64_getlasthdr(*mp, &offset);
1659 if (proto < 0) {
1660 NAT64STAT_INC(&cfg->base.stats, dropped);
1661 DPRINTF(DP_DROPS, "mbuf isn't contigious");
1662 return (IP_FW_DENY);
1663 }
1664 if (proto == IPPROTO_ICMPV6) {
1665 icmp6 = mtodo(*mp, offset);
1666 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1667 icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1668 port = ntohs(icmp6->icmp6_id);
1669 }
1670 proto = IPPROTO_ICMP;
1671 /* FALLTHROUGH */
1672 case IPPROTO_TCP:
1673 case IPPROTO_UDP:
1674 break;
1675 default:
1676 NAT64STAT_INC(&cfg->base.stats, noproto);
1677 return (cfg->nomatch_verdict);
1678 }
1679
1680 /* Extract IPv4 from destination IPv6 address */
1681 addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1682 if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1683 char a[INET_ADDRSTRLEN];
1684
1685 NAT64STAT_INC(&cfg->base.stats, dropped);
1686 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1687 inet_ntop(AF_INET, &addr, a, sizeof(a)));
1688 return (IP_FW_DENY); /* XXX: add extra stats? */
1689 }
1690
1691 /* Try to find host */
1692 hval = HOST_HVAL(cfg, &f_id->src_ip6);
1693 CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1694 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1695 break;
1696 }
1697 /* We use IPv4 address in host byte order */
1698 addr = ntohl(addr);
1699 if (host == NULL)
1700 return (nat64lsn_request_host(cfg, f_id, mp,
1701 hval, addr, port, proto));
1702
1703 flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1704
1705 data[0] = addr;
1706 data[1] = (f_id->dst_port << 16) | port;
1707 hval = STATE_HVAL(cfg, data);
1708 state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1709 port, proto);
1710 if (state == NULL)
1711 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1712 port, proto));
1713 return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1714 }
1715
1716 /*
1717 * Main dataplane entry point.
1718 */
1719 int
ipfw_nat64lsn(struct ip_fw_chain * ch,struct ip_fw_args * args,ipfw_insn * cmd,int * done)1720 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1721 ipfw_insn *cmd, int *done)
1722 {
1723 struct nat64lsn_instance *i;
1724 ipfw_insn *icmd;
1725 int ret;
1726
1727 IPFW_RLOCK_ASSERT(ch);
1728
1729 *done = 0; /* continue the search in case of failure */
1730 icmd = cmd + F_LEN(cmd);
1731 if (cmd->opcode != O_EXTERNAL_ACTION ||
1732 insntod(cmd, kidx)->kidx != V_nat64lsn_eid ||
1733 icmd->opcode != O_EXTERNAL_INSTANCE ||
1734 (i = NAT64_LOOKUP(ch, icmd)) == NULL)
1735 return (IP_FW_DENY);
1736
1737 *done = 1; /* terminate the search */
1738
1739 switch (args->f_id.addr_type) {
1740 case 4:
1741 ret = nat64lsn_translate4(i->cfg, &args->f_id, &args->m);
1742 break;
1743 case 6:
1744 /*
1745 * Check that destination IPv6 address matches our prefix6.
1746 */
1747 if ((i->cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1748 memcmp(&args->f_id.dst_ip6, &i->cfg->base.plat_prefix,
1749 i->cfg->base.plat_plen / 8) != 0) {
1750 ret = i->cfg->nomatch_verdict;
1751 break;
1752 }
1753 ret = nat64lsn_translate6(i->cfg, &args->f_id, &args->m);
1754 break;
1755 default:
1756 ret = i->cfg->nomatch_verdict;
1757 }
1758
1759 if (ret != IP_FW_PASS && args->m != NULL) {
1760 m_freem(args->m);
1761 args->m = NULL;
1762 }
1763 return (ret);
1764 }
1765
1766 static int
nat64lsn_state_ctor(void * mem,int size,void * arg,int flags)1767 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1768 {
1769 struct nat64lsn_states_chunk *chunk;
1770 int i;
1771
1772 chunk = (struct nat64lsn_states_chunk *)mem;
1773 for (i = 0; i < 64; i++)
1774 chunk->state[i].flags = 0;
1775 return (0);
1776 }
1777
1778 void
nat64lsn_init_internal(void)1779 nat64lsn_init_internal(void)
1780 {
1781
1782 nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1783 sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1784 UMA_ALIGN_PTR, 0);
1785 nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1786 sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1787 UMA_ALIGN_PTR, 0);
1788 nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1789 sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1790 UMA_ALIGN_PTR, 0);
1791 nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1792 sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1793 UMA_ALIGN_PTR, 0);
1794 nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1795 sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1796 NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1797 nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1798 sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1799 UMA_ALIGN_PTR, 0);
1800 JQUEUE_LOCK_INIT();
1801 }
1802
1803 void
nat64lsn_uninit_internal(void)1804 nat64lsn_uninit_internal(void)
1805 {
1806
1807 /* XXX: epoch_task drain */
1808 JQUEUE_LOCK_DESTROY();
1809 uma_zdestroy(nat64lsn_host_zone);
1810 uma_zdestroy(nat64lsn_pgchunk_zone);
1811 uma_zdestroy(nat64lsn_pg_zone);
1812 uma_zdestroy(nat64lsn_aliaslink_zone);
1813 uma_zdestroy(nat64lsn_state_zone);
1814 uma_zdestroy(nat64lsn_job_zone);
1815 }
1816
1817 void
nat64lsn_start_instance(struct nat64lsn_cfg * cfg)1818 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1819 {
1820
1821 CALLOUT_LOCK(cfg);
1822 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1823 nat64lsn_periodic, cfg);
1824 CALLOUT_UNLOCK(cfg);
1825 }
1826
1827 struct nat64lsn_cfg *
nat64lsn_init_config(struct ip_fw_chain * ch,in_addr_t prefix,int plen)1828 nat64lsn_init_config(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1829 {
1830 struct nat64lsn_cfg *cfg;
1831 struct nat64lsn_alias *alias;
1832 int i, naddr;
1833
1834 cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1835 M_WAITOK | M_ZERO);
1836
1837 CFG_LOCK_INIT(cfg);
1838 CALLOUT_LOCK_INIT(cfg);
1839 STAILQ_INIT(&cfg->jhead);
1840 cfg->vp = curvnet;
1841 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1842
1843 cfg->hash_seed = arc4random();
1844 cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1845 cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1846 cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1847 for (i = 0; i < cfg->hosts_hashsize; i++)
1848 CK_SLIST_INIT(&cfg->hosts_hash[i]);
1849
1850 naddr = 1 << (32 - plen);
1851 cfg->prefix4 = prefix;
1852 cfg->pmask4 = prefix | (naddr - 1);
1853 cfg->plen4 = plen;
1854 cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1855 M_NAT64LSN, M_WAITOK | M_ZERO);
1856 for (i = 0; i < naddr; i++) {
1857 alias = &cfg->aliases[i];
1858 alias->addr = prefix + i; /* host byte order */
1859 CK_SLIST_INIT(&alias->hosts);
1860 ALIAS_LOCK_INIT(alias);
1861 }
1862
1863 callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1864 callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1865
1866 return (cfg);
1867 }
1868
1869 static void
nat64lsn_destroy_pg(struct nat64lsn_pg * pg)1870 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1871 {
1872 int i;
1873
1874 if (pg->chunks_count == 1) {
1875 uma_zfree(nat64lsn_state_zone, pg->states);
1876 } else {
1877 for (i = 0; i < pg->chunks_count; i++)
1878 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1879 free(pg->states_chunk, M_NAT64LSN);
1880 free(pg->freemask_chunk, M_NAT64LSN);
1881 }
1882 uma_zfree(nat64lsn_pg_zone, pg);
1883 }
1884
1885 static void
nat64lsn_destroy_alias(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias)1886 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1887 struct nat64lsn_alias *alias)
1888 {
1889 struct nat64lsn_pg *pg;
1890 int i;
1891
1892 while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1893 pg = CK_SLIST_FIRST(&alias->portgroups);
1894 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1895 nat64lsn_destroy_pg(pg);
1896 }
1897 for (i = 0; i < 32; i++) {
1898 if (ISSET32(alias->tcp_chunkmask, i))
1899 uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1900 if (ISSET32(alias->udp_chunkmask, i))
1901 uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1902 if (ISSET32(alias->icmp_chunkmask, i))
1903 uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1904 }
1905 ALIAS_LOCK_DESTROY(alias);
1906 }
1907
1908 static void
nat64lsn_destroy_host(struct nat64lsn_host * host)1909 nat64lsn_destroy_host(struct nat64lsn_host *host)
1910 {
1911 struct nat64lsn_aliaslink *link;
1912
1913 while (!CK_SLIST_EMPTY(&host->aliases)) {
1914 link = CK_SLIST_FIRST(&host->aliases);
1915 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1916
1917 ALIAS_LOCK(link->alias);
1918 CK_SLIST_REMOVE(&link->alias->hosts, link,
1919 nat64lsn_aliaslink, alias_entries);
1920 link->alias->hosts_count--;
1921 ALIAS_UNLOCK(link->alias);
1922
1923 uma_zfree(nat64lsn_aliaslink_zone, link);
1924 }
1925 HOST_LOCK_DESTROY(host);
1926 free(host->states_hash, M_NAT64LSN);
1927 uma_zfree(nat64lsn_host_zone, host);
1928 }
1929
1930 void
nat64lsn_destroy_config(struct nat64lsn_cfg * cfg)1931 nat64lsn_destroy_config(struct nat64lsn_cfg *cfg)
1932 {
1933 struct nat64lsn_host *host;
1934 int i;
1935
1936 CALLOUT_LOCK(cfg);
1937 callout_drain(&cfg->periodic);
1938 CALLOUT_UNLOCK(cfg);
1939 callout_drain(&cfg->jcallout);
1940
1941 for (i = 0; i < cfg->hosts_hashsize; i++) {
1942 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1943 host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1944 CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1945 nat64lsn_destroy_host(host);
1946 }
1947 }
1948
1949 for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1950 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1951
1952 CALLOUT_LOCK_DESTROY(cfg);
1953 CFG_LOCK_DESTROY(cfg);
1954 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1955 free(cfg->hosts_hash, M_NAT64LSN);
1956 free(cfg->aliases, M_NAT64LSN);
1957 free(cfg, M_NAT64LSN);
1958 }
1959
1960