1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/counter.h>
33 #include <sys/ck.h>
34 #include <sys/epoch.h>
35 #include <sys/errno.h>
36 #include <sys/hash.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rmlock.h>
43 #include <sys/socket.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_pflog.h>
50 #include <net/pfil.h>
51
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/ip_fw.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 #include <netinet6/in6_var.h>
62 #include <netinet6/ip6_var.h>
63 #include <netinet6/ip_fw_nat64.h>
64
65 #include <netpfil/ipfw/ip_fw_private.h>
66 #include <netpfil/pf/pf.h>
67
68 #include "nat64lsn.h"
69
70 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
71
72 #define NAT64LSN_EPOCH_ENTER(et) NET_EPOCH_ENTER(et)
73 #define NAT64LSN_EPOCH_EXIT(et) NET_EPOCH_EXIT(et)
74 #define NAT64LSN_EPOCH_ASSERT() NET_EPOCH_ASSERT()
75 #define NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
76
77 static uma_zone_t nat64lsn_host_zone;
78 static uma_zone_t nat64lsn_pgchunk_zone;
79 static uma_zone_t nat64lsn_pg_zone;
80 static uma_zone_t nat64lsn_aliaslink_zone;
81 static uma_zone_t nat64lsn_state_zone;
82 static uma_zone_t nat64lsn_job_zone;
83
84 static void nat64lsn_periodic(void *data);
85 #define PERIODIC_DELAY 4
86 #define NAT64_LOOKUP(chain, cmd) \
87 (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
88 /*
89 * Delayed job queue, used to create new hosts
90 * and new portgroups
91 */
92 enum nat64lsn_jtype {
93 JTYPE_NEWHOST = 1,
94 JTYPE_NEWPORTGROUP,
95 JTYPE_DESTROY,
96 };
97
98 struct nat64lsn_job_item {
99 STAILQ_ENTRY(nat64lsn_job_item) entries;
100 enum nat64lsn_jtype jtype;
101
102 union {
103 struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
104 struct mbuf *m;
105 struct nat64lsn_host *host;
106 struct nat64lsn_state *state;
107 uint32_t src6_hval;
108 uint32_t state_hval;
109 struct ipfw_flow_id f_id;
110 in_addr_t faddr;
111 uint16_t port;
112 uint8_t proto;
113 uint8_t done;
114 };
115 struct { /* used by JTYPE_DESTROY */
116 struct nat64lsn_hosts_slist hosts;
117 struct nat64lsn_pg_slist portgroups;
118 struct nat64lsn_pgchunk *pgchunk;
119 struct epoch_context epoch_ctx;
120 };
121 };
122 };
123
124 static struct mtx jmtx;
125 #define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
126 #define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
127 #define JQUEUE_LOCK() mtx_lock(&jmtx)
128 #define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
129
130 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
131 struct nat64lsn_job_item *ji);
132 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
133 struct nat64lsn_job_item *ji);
134 static struct nat64lsn_job_item *nat64lsn_create_job(
135 struct nat64lsn_cfg *cfg, int jtype);
136 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
137 struct nat64lsn_job_item *ji);
138 static void nat64lsn_job_destroy(epoch_context_t ctx);
139 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
140 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
141
142 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
143 const struct ipfw_flow_id *f_id, struct mbuf **mp);
144 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
145 struct ipfw_flow_id *f_id, struct mbuf **mp);
146 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
147 struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
148
149 #define NAT64_BIT_TCP_FIN 0 /* FIN was seen */
150 #define NAT64_BIT_TCP_SYN 1 /* First syn in->out */
151 #define NAT64_BIT_TCP_ESTAB 2 /* Packet with Ack */
152 #define NAT64_BIT_READY_IPV4 6 /* state is ready for translate4 */
153 #define NAT64_BIT_STALE 7 /* state is going to be expired */
154
155 #define NAT64_FLAG_FIN (1 << NAT64_BIT_TCP_FIN)
156 #define NAT64_FLAG_SYN (1 << NAT64_BIT_TCP_SYN)
157 #define NAT64_FLAG_ESTAB (1 << NAT64_BIT_TCP_ESTAB)
158 #define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
159
160 #define NAT64_FLAG_READY (1 << NAT64_BIT_READY_IPV4)
161 #define NAT64_FLAG_STALE (1 << NAT64_BIT_STALE)
162
163 static inline uint8_t
convert_tcp_flags(uint8_t flags)164 convert_tcp_flags(uint8_t flags)
165 {
166 uint8_t result;
167
168 result = flags & (TH_FIN|TH_SYN);
169 result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
170 result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
171
172 return (result);
173 }
174
175 static void
nat64lsn_log(struct pfloghdr * plog,struct mbuf * m,sa_family_t family,struct nat64lsn_state * state)176 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
177 struct nat64lsn_state *state)
178 {
179
180 memset(plog, 0, sizeof(*plog));
181 plog->length = PFLOG_HDRLEN;
182 plog->af = family;
183 plog->action = PF_NAT;
184 plog->dir = PF_IN;
185 plog->rulenr = htonl(state->ip_src);
186 plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
187 (state->proto << 8) | (state->ip_dst & 0xff));
188 plog->ruleset[0] = '\0';
189 strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
190 ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
191 }
192
193 #define HVAL(p, n, s) jenkins_hash32((const uint32_t *)(p), (n), (s))
194 #define HOST_HVAL(c, a) HVAL((a),\
195 sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
196 #define HOSTS(c, v) ((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
197
198 #define ALIASLINK_HVAL(c, f) HVAL(&(f)->dst_ip6,\
199 sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
200 #define ALIAS_BYHASH(c, v) \
201 ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
202 static struct nat64lsn_aliaslink*
nat64lsn_get_aliaslink(struct nat64lsn_cfg * cfg __unused,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id __unused)203 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
204 struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
205 {
206
207 /*
208 * We can implement some different algorithms how
209 * select an alias address.
210 * XXX: for now we use first available.
211 */
212 return (CK_SLIST_FIRST(&host->aliases));
213 }
214
215 #define STATE_HVAL(c, d) HVAL((d), 2, (c)->hash_seed)
216 #define STATE_HASH(h, v) \
217 ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
218 #define STATES_CHUNK(p, v) \
219 ((p)->chunks_count == 1 ? (p)->states : \
220 ((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
221
222 #ifdef __LP64__
223 #define FREEMASK_FFSLL(pg, faddr) \
224 ffsll(*FREEMASK_CHUNK((pg), (faddr)))
225 #define FREEMASK_BTR(pg, faddr, bit) \
226 ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
227 #define FREEMASK_BTS(pg, faddr, bit) \
228 ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
229 #define FREEMASK_ISSET(pg, faddr, bit) \
230 ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
231 #define FREEMASK_COPY(pg, n, out) \
232 (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
233 #else
234 static inline int
freemask_ffsll(uint32_t * freemask)235 freemask_ffsll(uint32_t *freemask)
236 {
237 int i;
238
239 if ((i = ffsl(freemask[0])) != 0)
240 return (i);
241 if ((i = ffsl(freemask[1])) != 0)
242 return (i + 32);
243 return (0);
244 }
245 #define FREEMASK_FFSLL(pg, faddr) \
246 freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
247 #define FREEMASK_BTR(pg, faddr, bit) \
248 ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
249 #define FREEMASK_BTS(pg, faddr, bit) \
250 ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
251 #define FREEMASK_ISSET(pg, faddr, bit) \
252 ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
253 #define FREEMASK_COPY(pg, n, out) \
254 (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
255 ((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
256 #endif /* !__LP64__ */
257
258 #define NAT64LSN_TRY_PGCNT 32
259 static struct nat64lsn_pg*
nat64lsn_get_pg(uint32_t * chunkmask,uint32_t * pgmask,struct nat64lsn_pgchunk ** chunks,struct nat64lsn_pg ** pgptr,uint32_t * pgidx,in_addr_t faddr)260 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
261 struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
262 uint32_t *pgidx, in_addr_t faddr)
263 {
264 struct nat64lsn_pg *pg, *oldpg;
265 uint32_t idx, oldidx;
266 int cnt;
267
268 cnt = 0;
269 /* First try last used PG */
270 oldpg = pg = ck_pr_load_ptr(pgptr);
271 idx = oldidx = ck_pr_load_32(pgidx);
272 /* If pgidx is out of range, reset it to the first pgchunk */
273 if (!ISSET32(*chunkmask, idx / 32))
274 idx = 0;
275 do {
276 ck_pr_fence_load();
277 if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
278 /*
279 * If last used PG has not free states,
280 * try to update pointer.
281 * NOTE: it can be already updated by jobs handler,
282 * thus we use CAS operation.
283 */
284 if (cnt > 0)
285 ck_pr_cas_ptr(pgptr, oldpg, pg);
286 return (pg);
287 }
288 /* Stop if idx is out of range */
289 if (!ISSET32(*chunkmask, idx / 32))
290 break;
291
292 if (ISSET32(pgmask[idx / 32], idx % 32))
293 pg = ck_pr_load_ptr(
294 &chunks[idx / 32]->pgptr[idx % 32]);
295 else
296 pg = NULL;
297
298 idx++;
299 } while (++cnt < NAT64LSN_TRY_PGCNT);
300
301 /* If pgidx is out of range, reset it to the first pgchunk */
302 if (!ISSET32(*chunkmask, idx / 32))
303 idx = 0;
304 ck_pr_cas_32(pgidx, oldidx, idx);
305 return (NULL);
306 }
307
308 static struct nat64lsn_state*
nat64lsn_get_state6to4(struct nat64lsn_cfg * cfg,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)309 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
310 const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
311 uint16_t port, uint8_t proto)
312 {
313 struct nat64lsn_aliaslink *link;
314 struct nat64lsn_state *state;
315 struct nat64lsn_pg *pg;
316 int i, offset;
317
318 NAT64LSN_EPOCH_ASSERT();
319
320 /* Check that we already have state for given arguments */
321 CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
322 if (state->proto == proto && state->ip_dst == faddr &&
323 state->sport == port && state->dport == f_id->dst_port)
324 return (state);
325 }
326
327 link = nat64lsn_get_aliaslink(cfg, host, f_id);
328 if (link == NULL)
329 return (NULL);
330
331 switch (proto) {
332 case IPPROTO_TCP:
333 pg = nat64lsn_get_pg(
334 &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
335 link->alias->tcp, &link->alias->tcp_pg,
336 &link->alias->tcp_pgidx, faddr);
337 break;
338 case IPPROTO_UDP:
339 pg = nat64lsn_get_pg(
340 &link->alias->udp_chunkmask, link->alias->udp_pgmask,
341 link->alias->udp, &link->alias->udp_pg,
342 &link->alias->udp_pgidx, faddr);
343 break;
344 case IPPROTO_ICMP:
345 pg = nat64lsn_get_pg(
346 &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
347 link->alias->icmp, &link->alias->icmp_pg,
348 &link->alias->icmp_pgidx, faddr);
349 break;
350 default:
351 panic("%s: wrong proto %d", __func__, proto);
352 }
353 if (pg == NULL)
354 return (NULL);
355
356 /* Check that PG has some free states */
357 state = NULL;
358 i = FREEMASK_BITCOUNT(pg, faddr);
359 while (i-- > 0) {
360 offset = FREEMASK_FFSLL(pg, faddr);
361 if (offset == 0) {
362 /*
363 * We lost the race.
364 * No more free states in this PG.
365 */
366 break;
367 }
368
369 /* Lets try to atomically grab the state */
370 if (FREEMASK_BTR(pg, faddr, offset - 1)) {
371 state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
372 /* Initialize */
373 state->flags = proto != IPPROTO_TCP ? 0 :
374 convert_tcp_flags(f_id->_flags);
375 state->proto = proto;
376 state->aport = pg->base_port + offset - 1;
377 state->dport = f_id->dst_port;
378 state->sport = port;
379 state->ip6_dst = f_id->dst_ip6;
380 state->ip_dst = faddr;
381 state->ip_src = link->alias->addr;
382 state->hval = hval;
383 state->host = host;
384 SET_AGE(state->timestamp);
385
386 /* Insert new state into host's hash table */
387 HOST_LOCK(host);
388 CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
389 state, entries);
390 host->states_count++;
391 /*
392 * XXX: In case if host is going to be expired,
393 * reset NAT64LSN_DEADHOST flag.
394 */
395 host->flags &= ~NAT64LSN_DEADHOST;
396 HOST_UNLOCK(host);
397 NAT64STAT_INC(&cfg->base.stats, screated);
398 /* Mark the state as ready for translate4 */
399 ck_pr_fence_store();
400 ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
401 break;
402 }
403 }
404 return (state);
405 }
406
407 /*
408 * Inspects icmp packets to see if the message contains different
409 * packet header so we need to alter @addr and @port.
410 */
411 static int
inspect_icmp_mbuf(struct mbuf ** mp,uint8_t * proto,uint32_t * addr,uint16_t * port)412 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
413 uint16_t *port)
414 {
415 struct icmp *icmp;
416 struct ip *ip;
417 int off;
418 uint8_t inner_proto;
419
420 ip = mtod(*mp, struct ip *); /* Outer IP header */
421 off = (ip->ip_hl << 2) + ICMP_MINLEN;
422 if ((*mp)->m_len < off)
423 *mp = m_pullup(*mp, off);
424 if (*mp == NULL)
425 return (ENOMEM);
426
427 ip = mtod(*mp, struct ip *); /* Outer IP header */
428 icmp = L3HDR(ip, struct icmp *);
429 switch (icmp->icmp_type) {
430 case ICMP_ECHO:
431 case ICMP_ECHOREPLY:
432 /* Use icmp ID as distinguisher */
433 *port = ntohs(icmp->icmp_id);
434 return (0);
435 case ICMP_UNREACH:
436 case ICMP_TIMXCEED:
437 break;
438 default:
439 return (EOPNOTSUPP);
440 }
441 /*
442 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
443 * of ULP header.
444 */
445 if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
446 return (EINVAL);
447 if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
448 *mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
449 if (*mp == NULL)
450 return (ENOMEM);
451 ip = mtodo(*mp, off); /* Inner IP header */
452 inner_proto = ip->ip_p;
453 off += ip->ip_hl << 2; /* Skip inner IP header */
454 *addr = ntohl(ip->ip_src.s_addr);
455 if ((*mp)->m_len < off + ICMP_MINLEN)
456 *mp = m_pullup(*mp, off + ICMP_MINLEN);
457 if (*mp == NULL)
458 return (ENOMEM);
459 switch (inner_proto) {
460 case IPPROTO_TCP:
461 case IPPROTO_UDP:
462 /* Copy source port from the header */
463 *port = ntohs(*((uint16_t *)mtodo(*mp, off)));
464 *proto = inner_proto;
465 return (0);
466 case IPPROTO_ICMP:
467 /*
468 * We will translate only ICMP errors for our ICMP
469 * echo requests.
470 */
471 icmp = mtodo(*mp, off);
472 if (icmp->icmp_type != ICMP_ECHO)
473 return (EOPNOTSUPP);
474 *port = ntohs(icmp->icmp_id);
475 return (0);
476 };
477 return (EOPNOTSUPP);
478 }
479
480 static struct nat64lsn_state*
nat64lsn_get_state4to6(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias,in_addr_t faddr,uint16_t port,uint8_t proto)481 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
482 in_addr_t faddr, uint16_t port, uint8_t proto)
483 {
484 struct nat64lsn_state *state;
485 struct nat64lsn_pg *pg;
486 int chunk_idx, pg_idx, state_idx;
487
488 NAT64LSN_EPOCH_ASSERT();
489
490 if (port < NAT64_MIN_PORT)
491 return (NULL);
492 /*
493 * Alias keeps 32 pgchunks for each protocol.
494 * Each pgchunk has 32 pointers to portgroup.
495 * Each portgroup has 64 states for ports.
496 */
497 port -= NAT64_MIN_PORT;
498 chunk_idx = port / 2048;
499
500 port -= chunk_idx * 2048;
501 pg_idx = port / 64;
502 state_idx = port % 64;
503
504 /*
505 * First check in proto_chunkmask that we have allocated PG chunk.
506 * Then check in proto_pgmask that we have valid PG pointer.
507 */
508 pg = NULL;
509 switch (proto) {
510 case IPPROTO_TCP:
511 if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
512 ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
513 pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
514 break;
515 }
516 return (NULL);
517 case IPPROTO_UDP:
518 if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
519 ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
520 pg = alias->udp[chunk_idx]->pgptr[pg_idx];
521 break;
522 }
523 return (NULL);
524 case IPPROTO_ICMP:
525 if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
526 ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
527 pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
528 break;
529 }
530 return (NULL);
531 default:
532 panic("%s: wrong proto %d", __func__, proto);
533 }
534 if (pg == NULL)
535 return (NULL);
536
537 if (FREEMASK_ISSET(pg, faddr, state_idx))
538 return (NULL);
539
540 state = &STATES_CHUNK(pg, faddr)->state[state_idx];
541 ck_pr_fence_load();
542 if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
543 return (state);
544 return (NULL);
545 }
546
547 /*
548 * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
549 * that might be unknown until reassembling is completed.
550 */
551 static struct mbuf*
nat64lsn_reassemble4(struct nat64lsn_cfg * cfg,struct mbuf * m,uint16_t * port)552 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
553 uint16_t *port)
554 {
555 struct ip *ip;
556 int len;
557
558 m = ip_reass(m);
559 if (m == NULL)
560 return (NULL);
561 /* IP header must be contigious after ip_reass() */
562 ip = mtod(m, struct ip *);
563 len = ip->ip_hl << 2;
564 switch (ip->ip_p) {
565 case IPPROTO_ICMP:
566 len += ICMP_MINLEN; /* Enough to get icmp_id */
567 break;
568 case IPPROTO_TCP:
569 len += sizeof(struct tcphdr);
570 break;
571 case IPPROTO_UDP:
572 len += sizeof(struct udphdr);
573 break;
574 default:
575 m_freem(m);
576 NAT64STAT_INC(&cfg->base.stats, noproto);
577 return (NULL);
578 }
579 if (m->m_len < len) {
580 m = m_pullup(m, len);
581 if (m == NULL) {
582 NAT64STAT_INC(&cfg->base.stats, nomem);
583 return (NULL);
584 }
585 ip = mtod(m, struct ip *);
586 }
587 switch (ip->ip_p) {
588 case IPPROTO_TCP:
589 *port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
590 break;
591 case IPPROTO_UDP:
592 *port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
593 break;
594 }
595 return (m);
596 }
597
598 static int
nat64lsn_translate4(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id,struct mbuf ** mp)599 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
600 const struct ipfw_flow_id *f_id, struct mbuf **mp)
601 {
602 struct pfloghdr loghdr, *logdata;
603 struct in6_addr src6;
604 struct nat64lsn_state *state;
605 struct nat64lsn_alias *alias;
606 uint32_t addr, flags;
607 uint16_t port, ts;
608 int ret;
609 uint8_t proto;
610
611 addr = f_id->dst_ip;
612 port = f_id->dst_port;
613 proto = f_id->proto;
614 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
615 NAT64STAT_INC(&cfg->base.stats, nomatch4);
616 return (cfg->nomatch_verdict);
617 }
618
619 /* Reassemble fragments if needed */
620 ret = ntohs(mtod(*mp, struct ip *)->ip_off);
621 if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
622 *mp = nat64lsn_reassemble4(cfg, *mp, &port);
623 if (*mp == NULL)
624 return (IP_FW_DENY);
625 }
626
627 /* Check if protocol is supported */
628 switch (proto) {
629 case IPPROTO_ICMP:
630 ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
631 if (ret != 0) {
632 if (ret == ENOMEM) {
633 NAT64STAT_INC(&cfg->base.stats, nomem);
634 return (IP_FW_DENY);
635 }
636 NAT64STAT_INC(&cfg->base.stats, noproto);
637 return (cfg->nomatch_verdict);
638 }
639 if (addr < cfg->prefix4 || addr > cfg->pmask4) {
640 NAT64STAT_INC(&cfg->base.stats, nomatch4);
641 return (cfg->nomatch_verdict);
642 }
643 /* FALLTHROUGH */
644 case IPPROTO_TCP:
645 case IPPROTO_UDP:
646 break;
647 default:
648 NAT64STAT_INC(&cfg->base.stats, noproto);
649 return (cfg->nomatch_verdict);
650 }
651
652 alias = &ALIAS_BYHASH(cfg, addr);
653 MPASS(addr == alias->addr);
654
655 /* Check that we have state for this port */
656 state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
657 port, proto);
658 if (state == NULL) {
659 NAT64STAT_INC(&cfg->base.stats, nomatch4);
660 return (cfg->nomatch_verdict);
661 }
662
663 /* TODO: Check flags to see if we need to do some static mapping */
664
665 /* Update some state fields if need */
666 SET_AGE(ts);
667 if (f_id->proto == IPPROTO_TCP)
668 flags = convert_tcp_flags(f_id->_flags);
669 else
670 flags = 0;
671 if (state->timestamp != ts)
672 state->timestamp = ts;
673 if ((state->flags & flags) != flags)
674 state->flags |= flags;
675
676 port = htons(state->sport);
677 src6 = state->ip6_dst;
678
679 if (cfg->base.flags & NAT64_LOG) {
680 logdata = &loghdr;
681 nat64lsn_log(logdata, *mp, AF_INET, state);
682 } else
683 logdata = NULL;
684
685 /*
686 * We already have src6 with embedded address, but it is possible,
687 * that src_ip is different than state->ip_dst, this is why we
688 * do embedding again.
689 */
690 nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
691 ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
692 &cfg->base, logdata);
693 if (ret == NAT64SKIP)
694 return (cfg->nomatch_verdict);
695 if (ret == NAT64RETURN)
696 *mp = NULL;
697 return (IP_FW_DENY);
698 }
699
700 /*
701 * Check if particular state is stale and should be deleted.
702 * Return 1 if true, 0 otherwise.
703 */
704 static int
nat64lsn_check_state(struct nat64lsn_cfg * cfg,struct nat64lsn_state * state)705 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
706 {
707 int age, ttl;
708
709 /* State was marked as stale in previous pass. */
710 if (ISSET32(state->flags, NAT64_BIT_STALE))
711 return (1);
712
713 /* State is not yet initialized, it is going to be READY */
714 if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
715 return (0);
716
717 age = GET_AGE(state->timestamp);
718 switch (state->proto) {
719 case IPPROTO_TCP:
720 if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
721 ttl = cfg->st_close_ttl;
722 else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
723 ttl = cfg->st_estab_ttl;
724 else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
725 ttl = cfg->st_syn_ttl;
726 else
727 ttl = cfg->st_syn_ttl;
728 if (age > ttl)
729 return (1);
730 break;
731 case IPPROTO_UDP:
732 if (age > cfg->st_udp_ttl)
733 return (1);
734 break;
735 case IPPROTO_ICMP:
736 if (age > cfg->st_icmp_ttl)
737 return (1);
738 break;
739 }
740 return (0);
741 }
742
743 static int
nat64lsn_maintain_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_pg * pg)744 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
745 {
746 struct nat64lsn_state *state;
747 struct nat64lsn_host *host;
748 uint64_t freemask;
749 int c, i, update_age;
750
751 update_age = 0;
752 for (c = 0; c < pg->chunks_count; c++) {
753 FREEMASK_COPY(pg, c, freemask);
754 for (i = 0; i < 64; i++) {
755 if (ISSET64(freemask, i))
756 continue;
757 state = &STATES_CHUNK(pg, c)->state[i];
758 if (nat64lsn_check_state(cfg, state) == 0) {
759 update_age = 1;
760 continue;
761 }
762 /*
763 * Expire state:
764 * 1. Mark as STALE and unlink from host's hash.
765 * 2. Set bit in freemask.
766 */
767 if (ISSET32(state->flags, NAT64_BIT_STALE)) {
768 /*
769 * State was marked as STALE in previous
770 * pass. Now it is safe to release it.
771 */
772 state->flags = 0;
773 ck_pr_fence_store();
774 FREEMASK_BTS(pg, c, i);
775 NAT64STAT_INC(&cfg->base.stats, sdeleted);
776 continue;
777 }
778 MPASS(state->flags & NAT64_FLAG_READY);
779
780 host = state->host;
781 HOST_LOCK(host);
782 CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
783 state, nat64lsn_state, entries);
784 host->states_count--;
785 HOST_UNLOCK(host);
786
787 /* Reset READY flag */
788 ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
789 /* And set STALE flag */
790 ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
791 ck_pr_fence_store();
792 /*
793 * Now translate6 will not use this state, wait
794 * until it become safe for translate4, then mark
795 * state as free.
796 */
797 }
798 }
799
800 /*
801 * We have some alive states, update timestamp.
802 */
803 if (update_age)
804 SET_AGE(pg->timestamp);
805
806 if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
807 return (0);
808
809 return (1);
810 }
811
812 static void
nat64lsn_expire_portgroups(struct nat64lsn_cfg * cfg,struct nat64lsn_pg_slist * portgroups)813 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
814 struct nat64lsn_pg_slist *portgroups)
815 {
816 struct nat64lsn_alias *alias;
817 struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
818 uint32_t *pgmask, *pgidx;
819 int i, idx;
820
821 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
822 alias = &cfg->aliases[i];
823 CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
824 if (nat64lsn_maintain_pg(cfg, pg) == 0)
825 continue;
826 /* Always keep first PG */
827 if (pg->base_port == NAT64_MIN_PORT)
828 continue;
829 /*
830 * PG is expired, unlink it and schedule for
831 * deferred destroying.
832 */
833 idx = (pg->base_port - NAT64_MIN_PORT) / 64;
834 switch (pg->proto) {
835 case IPPROTO_TCP:
836 pgmask = alias->tcp_pgmask;
837 pgptr = &alias->tcp_pg;
838 pgidx = &alias->tcp_pgidx;
839 firstpg = alias->tcp[0]->pgptr[0];
840 break;
841 case IPPROTO_UDP:
842 pgmask = alias->udp_pgmask;
843 pgptr = &alias->udp_pg;
844 pgidx = &alias->udp_pgidx;
845 firstpg = alias->udp[0]->pgptr[0];
846 break;
847 case IPPROTO_ICMP:
848 pgmask = alias->icmp_pgmask;
849 pgptr = &alias->icmp_pg;
850 pgidx = &alias->icmp_pgidx;
851 firstpg = alias->icmp[0]->pgptr[0];
852 break;
853 }
854 /* Reset the corresponding bit in pgmask array. */
855 ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
856 ck_pr_fence_store();
857 /* If last used PG points to this PG, reset it. */
858 ck_pr_cas_ptr(pgptr, pg, firstpg);
859 ck_pr_cas_32(pgidx, idx, 0);
860 /* Unlink PG from alias's chain */
861 ALIAS_LOCK(alias);
862 CK_SLIST_REMOVE(&alias->portgroups, pg,
863 nat64lsn_pg, entries);
864 alias->portgroups_count--;
865 ALIAS_UNLOCK(alias);
866 /* And link to job's chain for deferred destroying */
867 NAT64STAT_INC(&cfg->base.stats, spgdeleted);
868 CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
869 }
870 }
871 }
872
873 static void
nat64lsn_expire_hosts(struct nat64lsn_cfg * cfg,struct nat64lsn_hosts_slist * hosts)874 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
875 struct nat64lsn_hosts_slist *hosts)
876 {
877 struct nat64lsn_host *host, *tmp;
878 int i;
879
880 for (i = 0; i < cfg->hosts_hashsize; i++) {
881 CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
882 entries, tmp) {
883 /* Is host was marked in previous call? */
884 if (host->flags & NAT64LSN_DEADHOST) {
885 if (host->states_count > 0) {
886 host->flags &= ~NAT64LSN_DEADHOST;
887 continue;
888 }
889 /*
890 * Unlink host from hash table and schedule
891 * it for deferred destroying.
892 */
893 CFG_LOCK(cfg);
894 CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
895 nat64lsn_host, entries);
896 cfg->hosts_count--;
897 CFG_UNLOCK(cfg);
898 CK_SLIST_INSERT_HEAD(hosts, host, entries);
899 continue;
900 }
901 if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
902 continue;
903 if (host->states_count > 0)
904 continue;
905 /* Mark host as going to be expired in next pass */
906 host->flags |= NAT64LSN_DEADHOST;
907 ck_pr_fence_store();
908 }
909 }
910 }
911
912 static struct nat64lsn_pgchunk*
nat64lsn_expire_pgchunk(struct nat64lsn_cfg * cfg)913 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
914 {
915 #if 0
916 struct nat64lsn_alias *alias;
917 struct nat64lsn_pgchunk *chunk;
918 uint32_t pgmask;
919 int i, c;
920
921 for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
922 alias = &cfg->aliases[i];
923 if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
924 continue;
925 /* Always keep single chunk allocated */
926 for (c = 1; c < 32; c++) {
927 if ((alias->tcp_chunkmask & (1 << c)) == 0)
928 break;
929 chunk = ck_pr_load_ptr(&alias->tcp[c]);
930 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
931 continue;
932 ck_pr_btr_32(&alias->tcp_chunkmask, c);
933 ck_pr_fence_load();
934 if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
935 continue;
936 }
937 }
938 #endif
939 return (NULL);
940 }
941
942 #if 0
943 static void
944 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
945 {
946 struct nat64lsn_host *h;
947 struct nat64lsn_states_slist *hash;
948 int i, j, hsize;
949
950 for (i = 0; i < cfg->hosts_hashsize; i++) {
951 CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
952 if (h->states_count / 2 < h->states_hashsize ||
953 h->states_hashsize >= NAT64LSN_MAX_HSIZE)
954 continue;
955 hsize = h->states_hashsize * 2;
956 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
957 if (hash == NULL)
958 continue;
959 for (j = 0; j < hsize; j++)
960 CK_SLIST_INIT(&hash[i]);
961
962 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
963 }
964 }
965 }
966 #endif
967
968 /*
969 * This procedure is used to perform various maintenance
970 * on dynamic hash list. Currently it is called every 4 seconds.
971 */
972 static void
nat64lsn_periodic(void * data)973 nat64lsn_periodic(void *data)
974 {
975 struct nat64lsn_job_item *ji;
976 struct nat64lsn_cfg *cfg;
977
978 cfg = (struct nat64lsn_cfg *) data;
979 CURVNET_SET(cfg->vp);
980 if (cfg->hosts_count > 0) {
981 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
982 if (ji != NULL) {
983 ji->jtype = JTYPE_DESTROY;
984 CK_SLIST_INIT(&ji->hosts);
985 CK_SLIST_INIT(&ji->portgroups);
986 nat64lsn_expire_hosts(cfg, &ji->hosts);
987 nat64lsn_expire_portgroups(cfg, &ji->portgroups);
988 ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
989 NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
990 nat64lsn_job_destroy);
991 } else
992 NAT64STAT_INC(&cfg->base.stats, jnomem);
993 }
994 callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
995 CURVNET_RESTORE();
996 }
997
998 #define ALLOC_ERROR(stage, type) ((stage) ? 10 * (type) + (stage): 0)
999 #define HOST_ERROR(stage) ALLOC_ERROR(stage, 1)
1000 #define PG_ERROR(stage) ALLOC_ERROR(stage, 2)
1001 static int
nat64lsn_alloc_host(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1002 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1003 {
1004 char a[INET6_ADDRSTRLEN];
1005 struct nat64lsn_aliaslink *link;
1006 struct nat64lsn_host *host;
1007 struct nat64lsn_state *state;
1008 uint32_t hval, data[2];
1009 int i;
1010
1011 /* Check that host was not yet added. */
1012 NAT64LSN_EPOCH_ASSERT();
1013 CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1014 if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1015 /* The host was allocated in previous call. */
1016 ji->host = host;
1017 goto get_state;
1018 }
1019 }
1020
1021 host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1022 if (ji->host == NULL)
1023 return (HOST_ERROR(1));
1024
1025 host->states_hashsize = NAT64LSN_HSIZE;
1026 host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1027 host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1028 if (host->states_hash == NULL) {
1029 uma_zfree(nat64lsn_host_zone, host);
1030 return (HOST_ERROR(2));
1031 }
1032
1033 link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1034 if (link == NULL) {
1035 free(host->states_hash, M_NAT64LSN);
1036 uma_zfree(nat64lsn_host_zone, host);
1037 return (HOST_ERROR(3));
1038 }
1039
1040 /* Initialize */
1041 HOST_LOCK_INIT(host);
1042 SET_AGE(host->timestamp);
1043 host->addr = ji->f_id.src_ip6;
1044 host->hval = ji->src6_hval;
1045 host->flags = 0;
1046 host->states_count = 0;
1047 host->states_hashsize = NAT64LSN_HSIZE;
1048 CK_SLIST_INIT(&host->aliases);
1049 for (i = 0; i < host->states_hashsize; i++)
1050 CK_SLIST_INIT(&host->states_hash[i]);
1051
1052 /* Determine alias from flow hash. */
1053 hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1054 link->alias = &ALIAS_BYHASH(cfg, hval);
1055 CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1056
1057 ALIAS_LOCK(link->alias);
1058 CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1059 link->alias->hosts_count++;
1060 ALIAS_UNLOCK(link->alias);
1061
1062 CFG_LOCK(cfg);
1063 CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1064 cfg->hosts_count++;
1065 CFG_UNLOCK(cfg);
1066
1067 get_state:
1068 data[0] = ji->faddr;
1069 data[1] = (ji->f_id.dst_port << 16) | ji->port;
1070 ji->state_hval = hval = STATE_HVAL(cfg, data);
1071 state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1072 ji->faddr, ji->port, ji->proto);
1073 /*
1074 * We failed to obtain new state, used alias needs new PG.
1075 * XXX: or another alias should be used.
1076 */
1077 if (state == NULL) {
1078 /* Try to allocate new PG */
1079 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1080 return (HOST_ERROR(4));
1081 /* We assume that nat64lsn_alloc_pg() got state */
1082 } else
1083 ji->state = state;
1084
1085 ji->done = 1;
1086 DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1087 inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1088 return (HOST_ERROR(0));
1089 }
1090
1091 static int
nat64lsn_find_pg_place(uint32_t * data)1092 nat64lsn_find_pg_place(uint32_t *data)
1093 {
1094 int i;
1095
1096 for (i = 0; i < 32; i++) {
1097 if (~data[i] == 0)
1098 continue;
1099 return (i * 32 + ffs(~data[i]) - 1);
1100 }
1101 return (-1);
1102 }
1103
1104 static int
nat64lsn_alloc_proto_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias,uint32_t * chunkmask,uint32_t * pgmask,struct nat64lsn_pgchunk ** chunks,struct nat64lsn_pg ** pgptr,uint8_t proto)1105 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1106 struct nat64lsn_alias *alias, uint32_t *chunkmask,
1107 uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1108 struct nat64lsn_pg **pgptr, uint8_t proto)
1109 {
1110 struct nat64lsn_pg *pg;
1111 int i, pg_idx, chunk_idx;
1112
1113 /* Find place in pgchunk where PG can be added */
1114 pg_idx = nat64lsn_find_pg_place(pgmask);
1115 if (pg_idx < 0) /* no more PGs */
1116 return (PG_ERROR(1));
1117 /* Check that we have allocated pgchunk for given PG index */
1118 chunk_idx = pg_idx / 32;
1119 if (!ISSET32(*chunkmask, chunk_idx)) {
1120 chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1121 M_NOWAIT);
1122 if (chunks[chunk_idx] == NULL)
1123 return (PG_ERROR(2));
1124 ck_pr_bts_32(chunkmask, chunk_idx);
1125 ck_pr_fence_store();
1126 }
1127 /* Allocate PG and states chunks */
1128 pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1129 if (pg == NULL)
1130 return (PG_ERROR(3));
1131 pg->chunks_count = cfg->states_chunks;
1132 if (pg->chunks_count > 1) {
1133 pg->freemask_chunk = malloc(pg->chunks_count *
1134 sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1135 if (pg->freemask_chunk == NULL) {
1136 uma_zfree(nat64lsn_pg_zone, pg);
1137 return (PG_ERROR(4));
1138 }
1139 pg->states_chunk = malloc(pg->chunks_count *
1140 sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1141 M_NOWAIT | M_ZERO);
1142 if (pg->states_chunk == NULL) {
1143 free(pg->freemask_chunk, M_NAT64LSN);
1144 uma_zfree(nat64lsn_pg_zone, pg);
1145 return (PG_ERROR(5));
1146 }
1147 for (i = 0; i < pg->chunks_count; i++) {
1148 pg->states_chunk[i] = uma_zalloc(
1149 nat64lsn_state_zone, M_NOWAIT);
1150 if (pg->states_chunk[i] == NULL)
1151 goto states_failed;
1152 }
1153 memset(pg->freemask_chunk, 0xff,
1154 sizeof(uint64_t) * pg->chunks_count);
1155 } else {
1156 pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1157 if (pg->states == NULL) {
1158 uma_zfree(nat64lsn_pg_zone, pg);
1159 return (PG_ERROR(6));
1160 }
1161 memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1162 }
1163
1164 /* Initialize PG and hook it to pgchunk */
1165 SET_AGE(pg->timestamp);
1166 pg->proto = proto;
1167 pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1168 ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1169 ck_pr_fence_store();
1170 ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1171 ck_pr_store_ptr(pgptr, pg);
1172
1173 ALIAS_LOCK(alias);
1174 CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1175 SET_AGE(alias->timestamp);
1176 alias->portgroups_count++;
1177 ALIAS_UNLOCK(alias);
1178 NAT64STAT_INC(&cfg->base.stats, spgcreated);
1179 return (PG_ERROR(0));
1180
1181 states_failed:
1182 for (i = 0; i < pg->chunks_count; i++)
1183 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1184 free(pg->freemask_chunk, M_NAT64LSN);
1185 free(pg->states_chunk, M_NAT64LSN);
1186 uma_zfree(nat64lsn_pg_zone, pg);
1187 return (PG_ERROR(7));
1188 }
1189
1190 static int
nat64lsn_alloc_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1191 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1192 {
1193 struct nat64lsn_aliaslink *link;
1194 struct nat64lsn_alias *alias;
1195 int ret;
1196
1197 link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1198 if (link == NULL)
1199 return (PG_ERROR(1));
1200
1201 /*
1202 * TODO: check that we did not already allocated PG in
1203 * previous call.
1204 */
1205
1206 ret = 0;
1207 alias = link->alias;
1208 /* Find place in pgchunk where PG can be added */
1209 switch (ji->proto) {
1210 case IPPROTO_TCP:
1211 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1212 &alias->tcp_chunkmask, alias->tcp_pgmask,
1213 alias->tcp, &alias->tcp_pg, ji->proto);
1214 break;
1215 case IPPROTO_UDP:
1216 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1217 &alias->udp_chunkmask, alias->udp_pgmask,
1218 alias->udp, &alias->udp_pg, ji->proto);
1219 break;
1220 case IPPROTO_ICMP:
1221 ret = nat64lsn_alloc_proto_pg(cfg, alias,
1222 &alias->icmp_chunkmask, alias->icmp_pgmask,
1223 alias->icmp, &alias->icmp_pg, ji->proto);
1224 break;
1225 default:
1226 panic("%s: wrong proto %d", __func__, ji->proto);
1227 }
1228 if (ret == PG_ERROR(1)) {
1229 /*
1230 * PG_ERROR(1) means that alias lacks free PGs
1231 * XXX: try next alias.
1232 */
1233 printf("NAT64LSN: %s: failed to obtain PG\n",
1234 __func__);
1235 return (ret);
1236 }
1237 if (ret == PG_ERROR(0)) {
1238 ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1239 ji->state_hval, ji->faddr, ji->port, ji->proto);
1240 if (ji->state == NULL)
1241 ret = PG_ERROR(8);
1242 else
1243 ji->done = 1;
1244 }
1245 return (ret);
1246 }
1247
1248 static void
nat64lsn_do_request(void * data)1249 nat64lsn_do_request(void *data)
1250 {
1251 struct epoch_tracker et;
1252 struct nat64lsn_job_head jhead;
1253 struct nat64lsn_job_item *ji, *ji2;
1254 struct nat64lsn_cfg *cfg;
1255 int jcount;
1256 uint8_t flags;
1257
1258 cfg = (struct nat64lsn_cfg *)data;
1259 if (cfg->jlen == 0)
1260 return;
1261
1262 CURVNET_SET(cfg->vp);
1263 STAILQ_INIT(&jhead);
1264
1265 /* Grab queue */
1266 JQUEUE_LOCK();
1267 STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1268 jcount = cfg->jlen;
1269 cfg->jlen = 0;
1270 JQUEUE_UNLOCK();
1271
1272 /* TODO: check if we need to resize hash */
1273
1274 NAT64STAT_INC(&cfg->base.stats, jcalls);
1275 DPRINTF(DP_JQUEUE, "count=%d", jcount);
1276
1277 /*
1278 * TODO:
1279 * What we should do here is to build a hash
1280 * to ensure we don't have lots of duplicate requests.
1281 * Skip this for now.
1282 *
1283 * TODO: Limit per-call number of items
1284 */
1285
1286 NAT64LSN_EPOCH_ENTER(et);
1287 STAILQ_FOREACH(ji, &jhead, entries) {
1288 switch (ji->jtype) {
1289 case JTYPE_NEWHOST:
1290 if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1291 NAT64STAT_INC(&cfg->base.stats, jhostfails);
1292 break;
1293 case JTYPE_NEWPORTGROUP:
1294 if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1295 NAT64STAT_INC(&cfg->base.stats, jportfails);
1296 break;
1297 default:
1298 continue;
1299 }
1300 if (ji->done != 0) {
1301 flags = ji->proto != IPPROTO_TCP ? 0 :
1302 convert_tcp_flags(ji->f_id._flags);
1303 nat64lsn_translate6_internal(cfg, &ji->m,
1304 ji->state, flags);
1305 NAT64STAT_INC(&cfg->base.stats, jreinjected);
1306 }
1307 }
1308 NAT64LSN_EPOCH_EXIT(et);
1309
1310 ji = STAILQ_FIRST(&jhead);
1311 while (ji != NULL) {
1312 ji2 = STAILQ_NEXT(ji, entries);
1313 /*
1314 * In any case we must free mbuf if
1315 * translator did not consumed it.
1316 */
1317 m_freem(ji->m);
1318 uma_zfree(nat64lsn_job_zone, ji);
1319 ji = ji2;
1320 }
1321 CURVNET_RESTORE();
1322 }
1323
1324 static struct nat64lsn_job_item *
nat64lsn_create_job(struct nat64lsn_cfg * cfg,int jtype)1325 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1326 {
1327 struct nat64lsn_job_item *ji;
1328
1329 /*
1330 * Do not try to lock possibly contested mutex if we're near the
1331 * limit. Drop packet instead.
1332 */
1333 ji = NULL;
1334 if (cfg->jlen >= cfg->jmaxlen)
1335 NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1336 else {
1337 ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1338 if (ji == NULL)
1339 NAT64STAT_INC(&cfg->base.stats, jnomem);
1340 }
1341 if (ji == NULL) {
1342 NAT64STAT_INC(&cfg->base.stats, dropped);
1343 DPRINTF(DP_DROPS, "failed to create job");
1344 } else {
1345 ji->jtype = jtype;
1346 ji->done = 0;
1347 }
1348 return (ji);
1349 }
1350
1351 static void
nat64lsn_enqueue_job(struct nat64lsn_cfg * cfg,struct nat64lsn_job_item * ji)1352 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1353 {
1354
1355 JQUEUE_LOCK();
1356 STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1357 NAT64STAT_INC(&cfg->base.stats, jrequests);
1358 cfg->jlen++;
1359
1360 if (callout_pending(&cfg->jcallout) == 0)
1361 callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1362 JQUEUE_UNLOCK();
1363 }
1364
1365 static void
nat64lsn_job_destroy(epoch_context_t ctx)1366 nat64lsn_job_destroy(epoch_context_t ctx)
1367 {
1368 struct nat64lsn_job_item *ji;
1369 struct nat64lsn_host *host;
1370 struct nat64lsn_pg *pg;
1371 int i;
1372
1373 ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1374 MPASS(ji->jtype == JTYPE_DESTROY);
1375 while (!CK_SLIST_EMPTY(&ji->hosts)) {
1376 host = CK_SLIST_FIRST(&ji->hosts);
1377 CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1378 if (host->states_count > 0) {
1379 /*
1380 * XXX: The state has been created
1381 * during host deletion.
1382 */
1383 printf("NAT64LSN: %s: destroying host with %d "
1384 "states\n", __func__, host->states_count);
1385 }
1386 nat64lsn_destroy_host(host);
1387 }
1388 while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1389 pg = CK_SLIST_FIRST(&ji->portgroups);
1390 CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1391 for (i = 0; i < pg->chunks_count; i++) {
1392 if (FREEMASK_BITCOUNT(pg, i) != 64) {
1393 /*
1394 * XXX: The state has been created during
1395 * PG deletion.
1396 */
1397 printf("NAT64LSN: %s: destroying PG %p "
1398 "with non-empty chunk %d\n", __func__,
1399 pg, i);
1400 }
1401 }
1402 nat64lsn_destroy_pg(pg);
1403 }
1404 uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1405 uma_zfree(nat64lsn_job_zone, ji);
1406 }
1407
1408 static int
nat64lsn_request_host(struct nat64lsn_cfg * cfg,const struct ipfw_flow_id * f_id,struct mbuf ** mp,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)1409 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1410 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1411 in_addr_t faddr, uint16_t port, uint8_t proto)
1412 {
1413 struct nat64lsn_job_item *ji;
1414
1415 ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1416 if (ji != NULL) {
1417 ji->m = *mp;
1418 ji->f_id = *f_id;
1419 ji->faddr = faddr;
1420 ji->port = port;
1421 ji->proto = proto;
1422 ji->src6_hval = hval;
1423
1424 nat64lsn_enqueue_job(cfg, ji);
1425 NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1426 *mp = NULL;
1427 }
1428 return (IP_FW_DENY);
1429 }
1430
1431 static int
nat64lsn_request_pg(struct nat64lsn_cfg * cfg,struct nat64lsn_host * host,const struct ipfw_flow_id * f_id,struct mbuf ** mp,uint32_t hval,in_addr_t faddr,uint16_t port,uint8_t proto)1432 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1433 const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1434 in_addr_t faddr, uint16_t port, uint8_t proto)
1435 {
1436 struct nat64lsn_job_item *ji;
1437
1438 ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1439 if (ji != NULL) {
1440 ji->m = *mp;
1441 ji->f_id = *f_id;
1442 ji->faddr = faddr;
1443 ji->port = port;
1444 ji->proto = proto;
1445 ji->state_hval = hval;
1446 ji->host = host;
1447
1448 nat64lsn_enqueue_job(cfg, ji);
1449 NAT64STAT_INC(&cfg->base.stats, jportreq);
1450 *mp = NULL;
1451 }
1452 return (IP_FW_DENY);
1453 }
1454
1455 static int
nat64lsn_translate6_internal(struct nat64lsn_cfg * cfg,struct mbuf ** mp,struct nat64lsn_state * state,uint8_t flags)1456 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1457 struct nat64lsn_state *state, uint8_t flags)
1458 {
1459 struct pfloghdr loghdr, *logdata;
1460 int ret;
1461 uint16_t ts;
1462
1463 /* Update timestamp and flags if needed */
1464 SET_AGE(ts);
1465 if (state->timestamp != ts)
1466 state->timestamp = ts;
1467 if ((state->flags & flags) != 0)
1468 state->flags |= flags;
1469
1470 if (cfg->base.flags & NAT64_LOG) {
1471 logdata = &loghdr;
1472 nat64lsn_log(logdata, *mp, AF_INET6, state);
1473 } else
1474 logdata = NULL;
1475
1476 ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1477 htons(state->aport), &cfg->base, logdata);
1478 if (ret == NAT64SKIP)
1479 return (cfg->nomatch_verdict);
1480 if (ret == NAT64RETURN)
1481 *mp = NULL;
1482 return (IP_FW_DENY);
1483 }
1484
1485 static int
nat64lsn_translate6(struct nat64lsn_cfg * cfg,struct ipfw_flow_id * f_id,struct mbuf ** mp)1486 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1487 struct mbuf **mp)
1488 {
1489 struct nat64lsn_state *state;
1490 struct nat64lsn_host *host;
1491 struct icmp6_hdr *icmp6;
1492 uint32_t addr, hval, data[2];
1493 int offset, proto;
1494 uint16_t port;
1495 uint8_t flags;
1496
1497 /* Check if protocol is supported */
1498 port = f_id->src_port;
1499 proto = f_id->proto;
1500 switch (f_id->proto) {
1501 case IPPROTO_ICMPV6:
1502 /*
1503 * For ICMPv6 echo reply/request we use icmp6_id as
1504 * local port.
1505 */
1506 offset = 0;
1507 proto = nat64_getlasthdr(*mp, &offset);
1508 if (proto < 0) {
1509 NAT64STAT_INC(&cfg->base.stats, dropped);
1510 DPRINTF(DP_DROPS, "mbuf isn't contigious");
1511 return (IP_FW_DENY);
1512 }
1513 if (proto == IPPROTO_ICMPV6) {
1514 icmp6 = mtodo(*mp, offset);
1515 if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1516 icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1517 port = ntohs(icmp6->icmp6_id);
1518 }
1519 proto = IPPROTO_ICMP;
1520 /* FALLTHROUGH */
1521 case IPPROTO_TCP:
1522 case IPPROTO_UDP:
1523 break;
1524 default:
1525 NAT64STAT_INC(&cfg->base.stats, noproto);
1526 return (cfg->nomatch_verdict);
1527 }
1528
1529 /* Extract IPv4 from destination IPv6 address */
1530 addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1531 if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1532 char a[INET_ADDRSTRLEN];
1533
1534 NAT64STAT_INC(&cfg->base.stats, dropped);
1535 DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1536 inet_ntop(AF_INET, &addr, a, sizeof(a)));
1537 return (IP_FW_DENY); /* XXX: add extra stats? */
1538 }
1539
1540 /* Try to find host */
1541 hval = HOST_HVAL(cfg, &f_id->src_ip6);
1542 CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1543 if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1544 break;
1545 }
1546 /* We use IPv4 address in host byte order */
1547 addr = ntohl(addr);
1548 if (host == NULL)
1549 return (nat64lsn_request_host(cfg, f_id, mp,
1550 hval, addr, port, proto));
1551
1552 flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1553
1554 data[0] = addr;
1555 data[1] = (f_id->dst_port << 16) | port;
1556 hval = STATE_HVAL(cfg, data);
1557 state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1558 port, proto);
1559 if (state == NULL)
1560 return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1561 port, proto));
1562 return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1563 }
1564
1565 /*
1566 * Main dataplane entry point.
1567 */
1568 int
ipfw_nat64lsn(struct ip_fw_chain * ch,struct ip_fw_args * args,ipfw_insn * cmd,int * done)1569 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1570 ipfw_insn *cmd, int *done)
1571 {
1572 struct nat64lsn_cfg *cfg;
1573 ipfw_insn *icmd;
1574 int ret;
1575
1576 IPFW_RLOCK_ASSERT(ch);
1577
1578 *done = 0; /* continue the search in case of failure */
1579 icmd = cmd + 1;
1580 if (cmd->opcode != O_EXTERNAL_ACTION ||
1581 cmd->arg1 != V_nat64lsn_eid ||
1582 icmd->opcode != O_EXTERNAL_INSTANCE ||
1583 (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1584 return (IP_FW_DENY);
1585
1586 *done = 1; /* terminate the search */
1587
1588 switch (args->f_id.addr_type) {
1589 case 4:
1590 ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1591 break;
1592 case 6:
1593 /*
1594 * Check that destination IPv6 address matches our prefix6.
1595 */
1596 if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1597 memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1598 cfg->base.plat_plen / 8) != 0) {
1599 ret = cfg->nomatch_verdict;
1600 break;
1601 }
1602 ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1603 break;
1604 default:
1605 ret = cfg->nomatch_verdict;
1606 }
1607
1608 if (ret != IP_FW_PASS && args->m != NULL) {
1609 m_freem(args->m);
1610 args->m = NULL;
1611 }
1612 return (ret);
1613 }
1614
1615 static int
nat64lsn_state_ctor(void * mem,int size,void * arg,int flags)1616 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1617 {
1618 struct nat64lsn_states_chunk *chunk;
1619 int i;
1620
1621 chunk = (struct nat64lsn_states_chunk *)mem;
1622 for (i = 0; i < 64; i++)
1623 chunk->state[i].flags = 0;
1624 return (0);
1625 }
1626
1627 void
nat64lsn_init_internal(void)1628 nat64lsn_init_internal(void)
1629 {
1630
1631 nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1632 sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1633 UMA_ALIGN_PTR, 0);
1634 nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1635 sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1636 UMA_ALIGN_PTR, 0);
1637 nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1638 sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1639 UMA_ALIGN_PTR, 0);
1640 nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1641 sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1642 UMA_ALIGN_PTR, 0);
1643 nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1644 sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1645 NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1646 nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1647 sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1648 UMA_ALIGN_PTR, 0);
1649 JQUEUE_LOCK_INIT();
1650 }
1651
1652 void
nat64lsn_uninit_internal(void)1653 nat64lsn_uninit_internal(void)
1654 {
1655
1656 /* XXX: epoch_task drain */
1657 JQUEUE_LOCK_DESTROY();
1658 uma_zdestroy(nat64lsn_host_zone);
1659 uma_zdestroy(nat64lsn_pgchunk_zone);
1660 uma_zdestroy(nat64lsn_pg_zone);
1661 uma_zdestroy(nat64lsn_aliaslink_zone);
1662 uma_zdestroy(nat64lsn_state_zone);
1663 uma_zdestroy(nat64lsn_job_zone);
1664 }
1665
1666 void
nat64lsn_start_instance(struct nat64lsn_cfg * cfg)1667 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1668 {
1669
1670 CALLOUT_LOCK(cfg);
1671 callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1672 nat64lsn_periodic, cfg);
1673 CALLOUT_UNLOCK(cfg);
1674 }
1675
1676 struct nat64lsn_cfg *
nat64lsn_init_instance(struct ip_fw_chain * ch,in_addr_t prefix,int plen)1677 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1678 {
1679 struct nat64lsn_cfg *cfg;
1680 struct nat64lsn_alias *alias;
1681 int i, naddr;
1682
1683 cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1684 M_WAITOK | M_ZERO);
1685
1686 CFG_LOCK_INIT(cfg);
1687 CALLOUT_LOCK_INIT(cfg);
1688 STAILQ_INIT(&cfg->jhead);
1689 cfg->vp = curvnet;
1690 COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1691
1692 cfg->hash_seed = arc4random();
1693 cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1694 cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1695 cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1696 for (i = 0; i < cfg->hosts_hashsize; i++)
1697 CK_SLIST_INIT(&cfg->hosts_hash[i]);
1698
1699 naddr = 1 << (32 - plen);
1700 cfg->prefix4 = prefix;
1701 cfg->pmask4 = prefix | (naddr - 1);
1702 cfg->plen4 = plen;
1703 cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1704 M_NAT64LSN, M_WAITOK | M_ZERO);
1705 for (i = 0; i < naddr; i++) {
1706 alias = &cfg->aliases[i];
1707 alias->addr = prefix + i; /* host byte order */
1708 CK_SLIST_INIT(&alias->hosts);
1709 ALIAS_LOCK_INIT(alias);
1710 }
1711
1712 callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1713 callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1714
1715 return (cfg);
1716 }
1717
1718 static void
nat64lsn_destroy_pg(struct nat64lsn_pg * pg)1719 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1720 {
1721 int i;
1722
1723 if (pg->chunks_count == 1) {
1724 uma_zfree(nat64lsn_state_zone, pg->states);
1725 } else {
1726 for (i = 0; i < pg->chunks_count; i++)
1727 uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1728 free(pg->states_chunk, M_NAT64LSN);
1729 free(pg->freemask_chunk, M_NAT64LSN);
1730 }
1731 uma_zfree(nat64lsn_pg_zone, pg);
1732 }
1733
1734 static void
nat64lsn_destroy_alias(struct nat64lsn_cfg * cfg,struct nat64lsn_alias * alias)1735 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1736 struct nat64lsn_alias *alias)
1737 {
1738 struct nat64lsn_pg *pg;
1739 int i;
1740
1741 while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1742 pg = CK_SLIST_FIRST(&alias->portgroups);
1743 CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1744 nat64lsn_destroy_pg(pg);
1745 }
1746 for (i = 0; i < 32; i++) {
1747 if (ISSET32(alias->tcp_chunkmask, i))
1748 uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1749 if (ISSET32(alias->udp_chunkmask, i))
1750 uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1751 if (ISSET32(alias->icmp_chunkmask, i))
1752 uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1753 }
1754 ALIAS_LOCK_DESTROY(alias);
1755 }
1756
1757 static void
nat64lsn_destroy_host(struct nat64lsn_host * host)1758 nat64lsn_destroy_host(struct nat64lsn_host *host)
1759 {
1760 struct nat64lsn_aliaslink *link;
1761
1762 while (!CK_SLIST_EMPTY(&host->aliases)) {
1763 link = CK_SLIST_FIRST(&host->aliases);
1764 CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1765
1766 ALIAS_LOCK(link->alias);
1767 CK_SLIST_REMOVE(&link->alias->hosts, link,
1768 nat64lsn_aliaslink, alias_entries);
1769 link->alias->hosts_count--;
1770 ALIAS_UNLOCK(link->alias);
1771
1772 uma_zfree(nat64lsn_aliaslink_zone, link);
1773 }
1774 HOST_LOCK_DESTROY(host);
1775 free(host->states_hash, M_NAT64LSN);
1776 uma_zfree(nat64lsn_host_zone, host);
1777 }
1778
1779 void
nat64lsn_destroy_instance(struct nat64lsn_cfg * cfg)1780 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1781 {
1782 struct nat64lsn_host *host;
1783 int i;
1784
1785 CALLOUT_LOCK(cfg);
1786 callout_drain(&cfg->periodic);
1787 CALLOUT_UNLOCK(cfg);
1788 callout_drain(&cfg->jcallout);
1789
1790 for (i = 0; i < cfg->hosts_hashsize; i++) {
1791 while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1792 host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1793 CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1794 nat64lsn_destroy_host(host);
1795 }
1796 }
1797
1798 for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1799 nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1800
1801 CALLOUT_LOCK_DESTROY(cfg);
1802 CFG_LOCK_DESTROY(cfg);
1803 COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1804 free(cfg->hosts_hash, M_NAT64LSN);
1805 free(cfg->aliases, M_NAT64LSN);
1806 free(cfg, M_NAT64LSN);
1807 }
1808