xref: /freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c (revision 731d06abf2105cc0873fa84e972178f9f37ca760)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/ck.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
39 #include <sys/hash.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
53 #include <net/pfil.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
67 
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
70 
71 #include "nat64lsn.h"
72 
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
74 
75 #define	NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
76 #define	NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
77 #define	NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
78 #define	NAT64LSN_EPOCH_CALL(c, f) epoch_call(net_epoch_preempt, (c), (f))
79 
80 static uma_zone_t nat64lsn_host_zone;
81 static uma_zone_t nat64lsn_pgchunk_zone;
82 static uma_zone_t nat64lsn_pg_zone;
83 static uma_zone_t nat64lsn_aliaslink_zone;
84 static uma_zone_t nat64lsn_state_zone;
85 static uma_zone_t nat64lsn_job_zone;
86 
87 static void nat64lsn_periodic(void *data);
88 #define	PERIODIC_DELAY		4
89 #define	NAT64_LOOKUP(chain, cmd)	\
90 	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
91 /*
92  * Delayed job queue, used to create new hosts
93  * and new portgroups
94  */
95 enum nat64lsn_jtype {
96 	JTYPE_NEWHOST = 1,
97 	JTYPE_NEWPORTGROUP,
98 	JTYPE_DESTROY,
99 };
100 
101 struct nat64lsn_job_item {
102 	STAILQ_ENTRY(nat64lsn_job_item)	entries;
103 	enum nat64lsn_jtype	jtype;
104 
105 	union {
106 		struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
107 			struct mbuf		*m;
108 			struct nat64lsn_host	*host;
109 			struct nat64lsn_state	*state;
110 			uint32_t		src6_hval;
111 			uint32_t		state_hval;
112 			struct ipfw_flow_id	f_id;
113 			in_addr_t		faddr;
114 			uint16_t		port;
115 			uint8_t			proto;
116 			uint8_t			done;
117 		};
118 		struct { /* used by JTYPE_DESTROY */
119 			struct nat64lsn_hosts_slist	hosts;
120 			struct nat64lsn_pg_slist	portgroups;
121 			struct nat64lsn_pgchunk		*pgchunk;
122 			struct epoch_context		epoch_ctx;
123 		};
124 	};
125 };
126 
127 static struct mtx jmtx;
128 #define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
129 #define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
130 #define	JQUEUE_LOCK()		mtx_lock(&jmtx)
131 #define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
132 
133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
134     struct nat64lsn_job_item *ji);
135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
136     struct nat64lsn_job_item *ji);
137 static struct nat64lsn_job_item *nat64lsn_create_job(
138     struct nat64lsn_cfg *cfg, int jtype);
139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
140     struct nat64lsn_job_item *ji);
141 static void nat64lsn_job_destroy(epoch_context_t ctx);
142 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
144 
145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
146     const struct ipfw_flow_id *f_id, struct mbuf **mp);
147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
148     struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
150     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
151 
152 #define	NAT64_BIT_TCP_FIN	0	/* FIN was seen */
153 #define	NAT64_BIT_TCP_SYN	1	/* First syn in->out */
154 #define	NAT64_BIT_TCP_ESTAB	2	/* Packet with Ack */
155 #define	NAT64_BIT_READY_IPV4	6	/* state is ready for translate4 */
156 #define	NAT64_BIT_STALE		7	/* state is going to be expired */
157 
158 #define	NAT64_FLAG_FIN		(1 << NAT64_BIT_TCP_FIN)
159 #define	NAT64_FLAG_SYN		(1 << NAT64_BIT_TCP_SYN)
160 #define	NAT64_FLAG_ESTAB	(1 << NAT64_BIT_TCP_ESTAB)
161 #define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
162 
163 #define	NAT64_FLAG_READY	(1 << NAT64_BIT_READY_IPV4)
164 #define	NAT64_FLAG_STALE	(1 << NAT64_BIT_STALE)
165 
166 static inline uint8_t
167 convert_tcp_flags(uint8_t flags)
168 {
169 	uint8_t result;
170 
171 	result = flags & (TH_FIN|TH_SYN);
172 	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
173 	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
174 
175 	return (result);
176 }
177 
178 static void
179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
180     struct nat64lsn_state *state)
181 {
182 
183 	memset(plog, 0, sizeof(*plog));
184 	plog->length = PFLOG_REAL_HDRLEN;
185 	plog->af = family;
186 	plog->action = PF_NAT;
187 	plog->dir = PF_IN;
188 	plog->rulenr = htonl(state->ip_src);
189 	plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
190 	    (state->proto << 8) | (state->ip_dst & 0xff));
191 	plog->ruleset[0] = '\0';
192 	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
193 	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
194 }
195 
196 #define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
197 #define	HOST_HVAL(c, a)	HVAL((a),\
198     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
199 #define	HOSTS(c, v)	((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
200 
201 #define	ALIASLINK_HVAL(c, f)	HVAL(&(f)->dst_ip6,\
202     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
203 #define	ALIAS_BYHASH(c, v)	\
204     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
205 static struct nat64lsn_aliaslink*
206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
207     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
208 {
209 
210 	/*
211 	 * We can implement some different algorithms how
212 	 * select an alias address.
213 	 * XXX: for now we use first available.
214 	 */
215 	return (CK_SLIST_FIRST(&host->aliases));
216 }
217 
218 #define	STATE_HVAL(c, d)	HVAL((d), 2, (c)->hash_seed)
219 #define	STATE_HASH(h, v)	\
220     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
221 #define	STATES_CHUNK(p, v)	\
222     ((p)->chunks_count == 1 ? (p)->states : \
223 	((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
224 
225 #ifdef __LP64__
226 #define	FREEMASK_FFSLL(pg, faddr)		\
227     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
228 #define	FREEMASK_BTR(pg, faddr, bit)	\
229     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230 #define	FREEMASK_BTS(pg, faddr, bit)	\
231     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define	FREEMASK_ISSET(pg, faddr, bit)	\
233     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define	FREEMASK_COPY(pg, n, out)	\
235     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
236 #else
237 static inline int
238 freemask_ffsll(uint32_t *freemask)
239 {
240 	int i;
241 
242 	if ((i = ffsl(freemask[0])) != 0)
243 		return (i);
244 	if ((i = ffsl(freemask[1])) != 0)
245 		return (i + 32);
246 	return (0);
247 }
248 #define	FREEMASK_FFSLL(pg, faddr)		\
249     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
250 #define	FREEMASK_BTR(pg, faddr, bit)	\
251     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252 #define	FREEMASK_BTS(pg, faddr, bit)	\
253     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define	FREEMASK_ISSET(pg, faddr, bit)	\
255     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
256 #define	FREEMASK_COPY(pg, n, out)	\
257     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
258 	((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
259 #endif /* !__LP64__ */
260 
261 
262 #define	NAT64LSN_TRY_PGCNT	32
263 static struct nat64lsn_pg*
264 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
265     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
266     uint32_t *pgidx, in_addr_t faddr)
267 {
268 	struct nat64lsn_pg *pg, *oldpg;
269 	uint32_t idx, oldidx;
270 	int cnt;
271 
272 	cnt = 0;
273 	/* First try last used PG */
274 	oldpg = pg = ck_pr_load_ptr(pgptr);
275 	idx = oldidx = ck_pr_load_32(pgidx);
276 	/* If pgidx is out of range, reset it to the first pgchunk */
277 	if (!ISSET32(*chunkmask, idx / 32))
278 		idx = 0;
279 	do {
280 		ck_pr_fence_load();
281 		if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
282 			/*
283 			 * If last used PG has not free states,
284 			 * try to update pointer.
285 			 * NOTE: it can be already updated by jobs handler,
286 			 *	 thus we use CAS operation.
287 			 */
288 			if (cnt > 0)
289 				ck_pr_cas_ptr(pgptr, oldpg, pg);
290 			return (pg);
291 		}
292 		/* Stop if idx is out of range */
293 		if (!ISSET32(*chunkmask, idx / 32))
294 			break;
295 
296 		if (ISSET32(pgmask[idx / 32], idx % 32))
297 			pg = ck_pr_load_ptr(
298 			    &chunks[idx / 32]->pgptr[idx % 32]);
299 		else
300 			pg = NULL;
301 
302 		idx++;
303 	} while (++cnt < NAT64LSN_TRY_PGCNT);
304 
305 	/* If pgidx is out of range, reset it to the first pgchunk */
306 	if (!ISSET32(*chunkmask, idx / 32))
307 		idx = 0;
308 	ck_pr_cas_32(pgidx, oldidx, idx);
309 	return (NULL);
310 }
311 
312 static struct nat64lsn_state*
313 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
314     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
315     uint16_t port, uint8_t proto)
316 {
317 	struct nat64lsn_aliaslink *link;
318 	struct nat64lsn_state *state;
319 	struct nat64lsn_pg *pg;
320 	int i, offset;
321 
322 	NAT64LSN_EPOCH_ASSERT();
323 
324 	/* Check that we already have state for given arguments */
325 	CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
326 		if (state->proto == proto && state->ip_dst == faddr &&
327 		    state->sport == port && state->dport == f_id->dst_port)
328 			return (state);
329 	}
330 
331 	link = nat64lsn_get_aliaslink(cfg, host, f_id);
332 	if (link == NULL)
333 		return (NULL);
334 
335 	switch (proto) {
336 	case IPPROTO_TCP:
337 		pg = nat64lsn_get_pg(
338 		    &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
339 		    link->alias->tcp, &link->alias->tcp_pg,
340 		    &link->alias->tcp_pgidx, faddr);
341 		break;
342 	case IPPROTO_UDP:
343 		pg = nat64lsn_get_pg(
344 		    &link->alias->udp_chunkmask, link->alias->udp_pgmask,
345 		    link->alias->udp, &link->alias->udp_pg,
346 		    &link->alias->udp_pgidx, faddr);
347 		break;
348 	case IPPROTO_ICMP:
349 		pg = nat64lsn_get_pg(
350 		    &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
351 		    link->alias->icmp, &link->alias->icmp_pg,
352 		    &link->alias->icmp_pgidx, faddr);
353 		break;
354 	default:
355 		panic("%s: wrong proto %d", __func__, proto);
356 	}
357 	if (pg == NULL)
358 		return (NULL);
359 
360 	/* Check that PG has some free states */
361 	state = NULL;
362 	i = FREEMASK_BITCOUNT(pg, faddr);
363 	while (i-- > 0) {
364 		offset = FREEMASK_FFSLL(pg, faddr);
365 		if (offset == 0) {
366 			/*
367 			 * We lost the race.
368 			 * No more free states in this PG.
369 			 */
370 			break;
371 		}
372 
373 		/* Lets try to atomically grab the state */
374 		if (FREEMASK_BTR(pg, faddr, offset - 1)) {
375 			state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
376 			/* Initialize */
377 			state->flags = proto != IPPROTO_TCP ? 0 :
378 			    convert_tcp_flags(f_id->_flags);
379 			state->proto = proto;
380 			state->aport = pg->base_port + offset - 1;
381 			state->dport = f_id->dst_port;
382 			state->sport = port;
383 			state->ip6_dst = f_id->dst_ip6;
384 			state->ip_dst = faddr;
385 			state->ip_src = link->alias->addr;
386 			state->hval = hval;
387 			state->host = host;
388 			SET_AGE(state->timestamp);
389 
390 			/* Insert new state into host's hash table */
391 			HOST_LOCK(host);
392 			CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
393 			    state, entries);
394 			host->states_count++;
395 			/*
396 			 * XXX: In case if host is going to be expired,
397 			 * reset NAT64LSN_DEADHOST flag.
398 			 */
399 			host->flags &= ~NAT64LSN_DEADHOST;
400 			HOST_UNLOCK(host);
401 			NAT64STAT_INC(&cfg->base.stats, screated);
402 			/* Mark the state as ready for translate4 */
403 			ck_pr_fence_store();
404 			ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
405 			break;
406 		}
407 	}
408 	return (state);
409 }
410 
411 /*
412  * Inspects icmp packets to see if the message contains different
413  * packet header so we need to alter @addr and @port.
414  */
415 static int
416 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
417     uint16_t *port)
418 {
419 	struct icmp *icmp;
420 	struct ip *ip;
421 	int off;
422 	uint8_t inner_proto;
423 
424 	ip = mtod(*mp, struct ip *); /* Outer IP header */
425 	off = (ip->ip_hl << 2) + ICMP_MINLEN;
426 	if ((*mp)->m_len < off)
427 		*mp = m_pullup(*mp, off);
428 	if (*mp == NULL)
429 		return (ENOMEM);
430 
431 	ip = mtod(*mp, struct ip *); /* Outer IP header */
432 	icmp = L3HDR(ip, struct icmp *);
433 	switch (icmp->icmp_type) {
434 	case ICMP_ECHO:
435 	case ICMP_ECHOREPLY:
436 		/* Use icmp ID as distinguisher */
437 		*port = ntohs(icmp->icmp_id);
438 		return (0);
439 	case ICMP_UNREACH:
440 	case ICMP_TIMXCEED:
441 		break;
442 	default:
443 		return (EOPNOTSUPP);
444 	}
445 	/*
446 	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
447 	 * of ULP header.
448 	 */
449 	if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
450 		return (EINVAL);
451 	if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
452 		*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
453 	if (*mp == NULL)
454 		return (ENOMEM);
455 	ip = mtodo(*mp, off); /* Inner IP header */
456 	inner_proto = ip->ip_p;
457 	off += ip->ip_hl << 2; /* Skip inner IP header */
458 	*addr = ntohl(ip->ip_src.s_addr);
459 	if ((*mp)->m_len < off + ICMP_MINLEN)
460 		*mp = m_pullup(*mp, off + ICMP_MINLEN);
461 	if (*mp == NULL)
462 		return (ENOMEM);
463 	switch (inner_proto) {
464 	case IPPROTO_TCP:
465 	case IPPROTO_UDP:
466 		/* Copy source port from the header */
467 		*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
468 		*proto = inner_proto;
469 		return (0);
470 	case IPPROTO_ICMP:
471 		/*
472 		 * We will translate only ICMP errors for our ICMP
473 		 * echo requests.
474 		 */
475 		icmp = mtodo(*mp, off);
476 		if (icmp->icmp_type != ICMP_ECHO)
477 			return (EOPNOTSUPP);
478 		*port = ntohs(icmp->icmp_id);
479 		return (0);
480 	};
481 	return (EOPNOTSUPP);
482 }
483 
484 static struct nat64lsn_state*
485 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
486     in_addr_t faddr, uint16_t port, uint8_t proto)
487 {
488 	struct nat64lsn_state *state;
489 	struct nat64lsn_pg *pg;
490 	int chunk_idx, pg_idx, state_idx;
491 
492 	NAT64LSN_EPOCH_ASSERT();
493 
494 	if (port < NAT64_MIN_PORT)
495 		return (NULL);
496 	/*
497 	 * Alias keeps 32 pgchunks for each protocol.
498 	 * Each pgchunk has 32 pointers to portgroup.
499 	 * Each portgroup has 64 states for ports.
500 	 */
501 	port -= NAT64_MIN_PORT;
502 	chunk_idx = port / 2048;
503 
504 	port -= chunk_idx * 2048;
505 	pg_idx = port / 64;
506 	state_idx = port % 64;
507 
508 	/*
509 	 * First check in proto_chunkmask that we have allocated PG chunk.
510 	 * Then check in proto_pgmask that we have valid PG pointer.
511 	 */
512 	pg = NULL;
513 	switch (proto) {
514 	case IPPROTO_TCP:
515 		if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
516 		    ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
517 			pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
518 			break;
519 		}
520 		return (NULL);
521 	case IPPROTO_UDP:
522 		if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
523 		    ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
524 			pg = alias->udp[chunk_idx]->pgptr[pg_idx];
525 			break;
526 		}
527 		return (NULL);
528 	case IPPROTO_ICMP:
529 		if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
530 		    ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
531 			pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
532 			break;
533 		}
534 		return (NULL);
535 	default:
536 		panic("%s: wrong proto %d", __func__, proto);
537 	}
538 	if (pg == NULL)
539 		return (NULL);
540 
541 	if (FREEMASK_ISSET(pg, faddr, state_idx))
542 		return (NULL);
543 
544 	state = &STATES_CHUNK(pg, faddr)->state[state_idx];
545 	ck_pr_fence_load();
546 	if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
547 		return (state);
548 	return (NULL);
549 }
550 
551 static int
552 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
553     const struct ipfw_flow_id *f_id, struct mbuf **mp)
554 {
555 	struct pfloghdr loghdr, *logdata;
556 	struct in6_addr src6;
557 	struct nat64lsn_state *state;
558 	struct nat64lsn_alias *alias;
559 	uint32_t addr, flags;
560 	uint16_t port, ts;
561 	int ret;
562 	uint8_t proto;
563 
564 	addr = f_id->dst_ip;
565 	port = f_id->dst_port;
566 	proto = f_id->proto;
567 	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
568 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
569 		return (cfg->nomatch_verdict);
570 	}
571 
572 	/* Check if protocol is supported */
573 	switch (proto) {
574 	case IPPROTO_ICMP:
575 		ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
576 		if (ret != 0) {
577 			if (ret == ENOMEM) {
578 				NAT64STAT_INC(&cfg->base.stats, nomem);
579 				return (IP_FW_DENY);
580 			}
581 			NAT64STAT_INC(&cfg->base.stats, noproto);
582 			return (cfg->nomatch_verdict);
583 		}
584 		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
585 			NAT64STAT_INC(&cfg->base.stats, nomatch4);
586 			return (cfg->nomatch_verdict);
587 		}
588 		/* FALLTHROUGH */
589 	case IPPROTO_TCP:
590 	case IPPROTO_UDP:
591 		break;
592 	default:
593 		NAT64STAT_INC(&cfg->base.stats, noproto);
594 		return (cfg->nomatch_verdict);
595 	}
596 
597 	alias = &ALIAS_BYHASH(cfg, addr);
598 	MPASS(addr == alias->addr);
599 
600 	/* Check that we have state for this port */
601 	state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
602 	    port, proto);
603 	if (state == NULL) {
604 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
605 		return (cfg->nomatch_verdict);
606 	}
607 
608 	/* TODO: Check flags to see if we need to do some static mapping */
609 
610 	/* Update some state fields if need */
611 	SET_AGE(ts);
612 	if (f_id->proto == IPPROTO_TCP)
613 		flags = convert_tcp_flags(f_id->_flags);
614 	else
615 		flags = 0;
616 	if (state->timestamp != ts)
617 		state->timestamp = ts;
618 	if ((state->flags & flags) != flags)
619 		state->flags |= flags;
620 
621 	port = htons(state->sport);
622 	src6 = state->ip6_dst;
623 
624 	if (cfg->base.flags & NAT64_LOG) {
625 		logdata = &loghdr;
626 		nat64lsn_log(logdata, *mp, AF_INET, state);
627 	} else
628 		logdata = NULL;
629 
630 	/*
631 	 * We already have src6 with embedded address, but it is possible,
632 	 * that src_ip is different than state->ip_dst, this is why we
633 	 * do embedding again.
634 	 */
635 	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
636 	ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
637 	    &cfg->base, logdata);
638 	if (ret == NAT64SKIP)
639 		return (cfg->nomatch_verdict);
640 	if (ret == NAT64RETURN)
641 		*mp = NULL;
642 	return (IP_FW_DENY);
643 }
644 
645 /*
646  * Check if particular state is stale and should be deleted.
647  * Return 1 if true, 0 otherwise.
648  */
649 static int
650 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
651 {
652 	int age, ttl;
653 
654 	/* State was marked as stale in previous pass. */
655 	if (ISSET32(state->flags, NAT64_BIT_STALE))
656 		return (1);
657 
658 	/* State is not yet initialized, it is going to be READY */
659 	if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
660 		return (0);
661 
662 	age = GET_AGE(state->timestamp);
663 	switch (state->proto) {
664 	case IPPROTO_TCP:
665 		if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
666 			ttl = cfg->st_close_ttl;
667 		else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
668 			ttl = cfg->st_estab_ttl;
669 		else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
670 			ttl = cfg->st_syn_ttl;
671 		else
672 			ttl = cfg->st_syn_ttl;
673 		if (age > ttl)
674 			return (1);
675 		break;
676 	case IPPROTO_UDP:
677 		if (age > cfg->st_udp_ttl)
678 			return (1);
679 		break;
680 	case IPPROTO_ICMP:
681 		if (age > cfg->st_icmp_ttl)
682 			return (1);
683 		break;
684 	}
685 	return (0);
686 }
687 
688 static int
689 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
690 {
691 	struct nat64lsn_state *state;
692 	struct nat64lsn_host *host;
693 	uint64_t freemask;
694 	int c, i, update_age;
695 
696 	update_age = 0;
697 	for (c = 0; c < pg->chunks_count; c++) {
698 		FREEMASK_COPY(pg, c, freemask);
699 		for (i = 0; i < 64; i++) {
700 			if (ISSET64(freemask, i))
701 				continue;
702 			state = &STATES_CHUNK(pg, c)->state[i];
703 			if (nat64lsn_check_state(cfg, state) == 0) {
704 				update_age = 1;
705 				continue;
706 			}
707 			/*
708 			 * Expire state:
709 			 * 1. Mark as STALE and unlink from host's hash.
710 			 * 2. Set bit in freemask.
711 			 */
712 			if (ISSET32(state->flags, NAT64_BIT_STALE)) {
713 				/*
714 				 * State was marked as STALE in previous
715 				 * pass. Now it is safe to release it.
716 				 */
717 				state->flags = 0;
718 				ck_pr_fence_store();
719 				FREEMASK_BTS(pg, c, i);
720 				NAT64STAT_INC(&cfg->base.stats, sdeleted);
721 				continue;
722 			}
723 			MPASS(state->flags & NAT64_FLAG_READY);
724 
725 			host = state->host;
726 			HOST_LOCK(host);
727 			CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
728 			    state, nat64lsn_state, entries);
729 			host->states_count--;
730 			HOST_UNLOCK(host);
731 
732 			/* Reset READY flag */
733 			ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
734 			/* And set STALE flag */
735 			ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
736 			ck_pr_fence_store();
737 			/*
738 			 * Now translate6 will not use this state, wait
739 			 * until it become safe for translate4, then mark
740 			 * state as free.
741 			 */
742 		}
743 	}
744 
745 	/*
746 	 * We have some alive states, update timestamp.
747 	 */
748 	if (update_age)
749 		SET_AGE(pg->timestamp);
750 
751 	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
752 		return (0);
753 
754 	return (1);
755 }
756 
757 static void
758 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
759     struct nat64lsn_pg_slist *portgroups)
760 {
761 	struct nat64lsn_alias *alias;
762 	struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
763 	uint32_t *pgmask, *pgidx;
764 	int i, idx;
765 
766 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
767 		alias = &cfg->aliases[i];
768 		CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
769 			if (nat64lsn_maintain_pg(cfg, pg) == 0)
770 				continue;
771 			/* Always keep first PG */
772 			if (pg->base_port == NAT64_MIN_PORT)
773 				continue;
774 			/*
775 			 * PG is expired, unlink it and schedule for
776 			 * deferred destroying.
777 			 */
778 			idx = (pg->base_port - NAT64_MIN_PORT) / 64;
779 			switch (pg->proto) {
780 			case IPPROTO_TCP:
781 				pgmask = alias->tcp_pgmask;
782 				pgptr = &alias->tcp_pg;
783 				pgidx = &alias->tcp_pgidx;
784 				firstpg = alias->tcp[0]->pgptr[0];
785 				break;
786 			case IPPROTO_UDP:
787 				pgmask = alias->udp_pgmask;
788 				pgptr = &alias->udp_pg;
789 				pgidx = &alias->udp_pgidx;
790 				firstpg = alias->udp[0]->pgptr[0];
791 				break;
792 			case IPPROTO_ICMP:
793 				pgmask = alias->icmp_pgmask;
794 				pgptr = &alias->icmp_pg;
795 				pgidx = &alias->icmp_pgidx;
796 				firstpg = alias->icmp[0]->pgptr[0];
797 				break;
798 			}
799 			/* Reset the corresponding bit in pgmask array. */
800 			ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
801 			ck_pr_fence_store();
802 			/* If last used PG points to this PG, reset it. */
803 			ck_pr_cas_ptr(pgptr, pg, firstpg);
804 			ck_pr_cas_32(pgidx, idx, 0);
805 			/* Unlink PG from alias's chain */
806 			ALIAS_LOCK(alias);
807 			CK_SLIST_REMOVE(&alias->portgroups, pg,
808 			    nat64lsn_pg, entries);
809 			alias->portgroups_count--;
810 			ALIAS_UNLOCK(alias);
811 			/* And link to job's chain for deferred destroying */
812 			NAT64STAT_INC(&cfg->base.stats, spgdeleted);
813 			CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
814 		}
815 	}
816 }
817 
818 static void
819 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
820     struct nat64lsn_hosts_slist *hosts)
821 {
822 	struct nat64lsn_host *host, *tmp;
823 	int i;
824 
825 	for (i = 0; i < cfg->hosts_hashsize; i++) {
826 		CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
827 		    entries, tmp) {
828 			/* Is host was marked in previous call? */
829 			if (host->flags & NAT64LSN_DEADHOST) {
830 				if (host->states_count > 0) {
831 					host->flags &= ~NAT64LSN_DEADHOST;
832 					continue;
833 				}
834 				/*
835 				 * Unlink host from hash table and schedule
836 				 * it for deferred destroying.
837 				 */
838 				CFG_LOCK(cfg);
839 				CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
840 				    nat64lsn_host, entries);
841 				cfg->hosts_count--;
842 				CFG_UNLOCK(cfg);
843 				CK_SLIST_INSERT_HEAD(hosts, host, entries);
844 				continue;
845 			}
846 			if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
847 				continue;
848 			if (host->states_count > 0)
849 				continue;
850 			/* Mark host as going to be expired in next pass */
851 			host->flags |= NAT64LSN_DEADHOST;
852 			ck_pr_fence_store();
853 		}
854 	}
855 }
856 
857 static struct nat64lsn_pgchunk*
858 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
859 {
860 #if 0
861 	struct nat64lsn_alias *alias;
862 	struct nat64lsn_pgchunk *chunk;
863 	uint32_t pgmask;
864 	int i, c;
865 
866 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
867 		alias = &cfg->aliases[i];
868 		if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
869 			continue;
870 		/* Always keep single chunk allocated */
871 		for (c = 1; c < 32; c++) {
872 			if ((alias->tcp_chunkmask & (1 << c)) == 0)
873 				break;
874 			chunk = ck_pr_load_ptr(&alias->tcp[c]);
875 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
876 				continue;
877 			ck_pr_btr_32(&alias->tcp_chunkmask, c);
878 			ck_pr_fence_load();
879 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
880 				continue;
881 		}
882 	}
883 #endif
884 	return (NULL);
885 }
886 
887 #if 0
888 static void
889 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
890 {
891 	struct nat64lsn_host *h;
892 	struct nat64lsn_states_slist *hash;
893 	int i, j, hsize;
894 
895 	for (i = 0; i < cfg->hosts_hashsize; i++) {
896 		CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
897 			 if (h->states_count / 2 < h->states_hashsize ||
898 			     h->states_hashsize >= NAT64LSN_MAX_HSIZE)
899 				 continue;
900 			 hsize = h->states_hashsize * 2;
901 			 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
902 			 if (hash == NULL)
903 				 continue;
904 			 for (j = 0; j < hsize; j++)
905 				CK_SLIST_INIT(&hash[i]);
906 
907 			 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
908 		}
909 	}
910 }
911 #endif
912 
913 /*
914  * This procedure is used to perform various maintance
915  * on dynamic hash list. Currently it is called every 4 seconds.
916  */
917 static void
918 nat64lsn_periodic(void *data)
919 {
920 	struct nat64lsn_job_item *ji;
921 	struct nat64lsn_cfg *cfg;
922 
923 	cfg = (struct nat64lsn_cfg *) data;
924 	CURVNET_SET(cfg->vp);
925 	if (cfg->hosts_count > 0) {
926 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
927 		if (ji != NULL) {
928 			ji->jtype = JTYPE_DESTROY;
929 			CK_SLIST_INIT(&ji->hosts);
930 			CK_SLIST_INIT(&ji->portgroups);
931 			nat64lsn_expire_hosts(cfg, &ji->hosts);
932 			nat64lsn_expire_portgroups(cfg, &ji->portgroups);
933 			ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
934 			NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
935 			    nat64lsn_job_destroy);
936 		} else
937 			NAT64STAT_INC(&cfg->base.stats, jnomem);
938 	}
939 	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
940 	CURVNET_RESTORE();
941 }
942 
943 #define	ALLOC_ERROR(stage, type)	((stage) ? 10 * (type) + (stage): 0)
944 #define	HOST_ERROR(stage)		ALLOC_ERROR(stage, 1)
945 #define	PG_ERROR(stage)			ALLOC_ERROR(stage, 2)
946 static int
947 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
948 {
949 	char a[INET6_ADDRSTRLEN];
950 	struct nat64lsn_aliaslink *link;
951 	struct nat64lsn_host *host;
952 	struct nat64lsn_state *state;
953 	uint32_t hval, data[2];
954 	int i;
955 
956 	/* Check that host was not yet added. */
957 	NAT64LSN_EPOCH_ASSERT();
958 	CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
959 		if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
960 			/* The host was allocated in previous call. */
961 			ji->host = host;
962 			goto get_state;
963 		}
964 	}
965 
966 	host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
967 	if (ji->host == NULL)
968 		return (HOST_ERROR(1));
969 
970 	host->states_hashsize = NAT64LSN_HSIZE;
971 	host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
972 	    host->states_hashsize, M_NAT64LSN, M_NOWAIT);
973 	if (host->states_hash == NULL) {
974 		uma_zfree(nat64lsn_host_zone, host);
975 		return (HOST_ERROR(2));
976 	}
977 
978 	link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
979 	if (link == NULL) {
980 		free(host->states_hash, M_NAT64LSN);
981 		uma_zfree(nat64lsn_host_zone, host);
982 		return (HOST_ERROR(3));
983 	}
984 
985 	/* Initialize */
986 	HOST_LOCK_INIT(host);
987 	SET_AGE(host->timestamp);
988 	host->addr = ji->f_id.src_ip6;
989 	host->hval = ji->src6_hval;
990 	host->flags = 0;
991 	host->states_count = 0;
992 	host->states_hashsize = NAT64LSN_HSIZE;
993 	CK_SLIST_INIT(&host->aliases);
994 	for (i = 0; i < host->states_hashsize; i++)
995 		CK_SLIST_INIT(&host->states_hash[i]);
996 
997 	/* Determine alias from flow hash. */
998 	hval = ALIASLINK_HVAL(cfg, &ji->f_id);
999 	link->alias = &ALIAS_BYHASH(cfg, hval);
1000 	CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1001 
1002 	ALIAS_LOCK(link->alias);
1003 	CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1004 	link->alias->hosts_count++;
1005 	ALIAS_UNLOCK(link->alias);
1006 
1007 	CFG_LOCK(cfg);
1008 	CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1009 	cfg->hosts_count++;
1010 	CFG_UNLOCK(cfg);
1011 
1012 get_state:
1013 	data[0] = ji->faddr;
1014 	data[1] = (ji->f_id.dst_port << 16) | ji->port;
1015 	ji->state_hval = hval = STATE_HVAL(cfg, data);
1016 	state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1017 	    ji->faddr, ji->port, ji->proto);
1018 	/*
1019 	 * We failed to obtain new state, used alias needs new PG.
1020 	 * XXX: or another alias should be used.
1021 	 */
1022 	if (state == NULL) {
1023 		/* Try to allocate new PG */
1024 		if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1025 			return (HOST_ERROR(4));
1026 		/* We assume that nat64lsn_alloc_pg() got state */
1027 	} else
1028 		ji->state = state;
1029 
1030 	ji->done = 1;
1031 	DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1032 	    inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1033 	return (HOST_ERROR(0));
1034 }
1035 
1036 static int
1037 nat64lsn_find_pg_place(uint32_t *data)
1038 {
1039 	int i;
1040 
1041 	for (i = 0; i < 32; i++) {
1042 		if (~data[i] == 0)
1043 			continue;
1044 		return (i * 32 + ffs(~data[i]) - 1);
1045 	}
1046 	return (-1);
1047 }
1048 
1049 static int
1050 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1051     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1052     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1053     struct nat64lsn_pg **pgptr, uint8_t proto)
1054 {
1055 	struct nat64lsn_pg *pg;
1056 	int i, pg_idx, chunk_idx;
1057 
1058 	/* Find place in pgchunk where PG can be added */
1059 	pg_idx = nat64lsn_find_pg_place(pgmask);
1060 	if (pg_idx < 0)	/* no more PGs */
1061 		return (PG_ERROR(1));
1062 	/* Check that we have allocated pgchunk for given PG index */
1063 	chunk_idx = pg_idx / 32;
1064 	if (!ISSET32(*chunkmask, chunk_idx)) {
1065 		chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1066 		    M_NOWAIT);
1067 		if (chunks[chunk_idx] == NULL)
1068 			return (PG_ERROR(2));
1069 		ck_pr_bts_32(chunkmask, chunk_idx);
1070 		ck_pr_fence_store();
1071 	}
1072 	/* Allocate PG and states chunks */
1073 	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1074 	if (pg == NULL)
1075 		return (PG_ERROR(3));
1076 	pg->chunks_count = cfg->states_chunks;
1077 	if (pg->chunks_count > 1) {
1078 		pg->freemask_chunk = malloc(pg->chunks_count *
1079 		    sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1080 		if (pg->freemask_chunk == NULL) {
1081 			uma_zfree(nat64lsn_pg_zone, pg);
1082 			return (PG_ERROR(4));
1083 		}
1084 		pg->states_chunk = malloc(pg->chunks_count *
1085 		    sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1086 		    M_NOWAIT | M_ZERO);
1087 		if (pg->states_chunk == NULL) {
1088 			free(pg->freemask_chunk, M_NAT64LSN);
1089 			uma_zfree(nat64lsn_pg_zone, pg);
1090 			return (PG_ERROR(5));
1091 		}
1092 		for (i = 0; i < pg->chunks_count; i++) {
1093 			pg->states_chunk[i] = uma_zalloc(
1094 			    nat64lsn_state_zone, M_NOWAIT);
1095 			if (pg->states_chunk[i] == NULL)
1096 				goto states_failed;
1097 		}
1098 		memset(pg->freemask_chunk, 0xff,
1099 		    sizeof(uint64_t) * pg->chunks_count);
1100 	} else {
1101 		pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1102 		if (pg->states == NULL) {
1103 			uma_zfree(nat64lsn_pg_zone, pg);
1104 			return (PG_ERROR(6));
1105 		}
1106 		memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1107 	}
1108 
1109 	/* Initialize PG and hook it to pgchunk */
1110 	SET_AGE(pg->timestamp);
1111 	pg->proto = proto;
1112 	pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1113 	ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1114 	ck_pr_fence_store();
1115 	ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1116 	ck_pr_store_ptr(pgptr, pg);
1117 
1118 	ALIAS_LOCK(alias);
1119 	CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1120 	SET_AGE(alias->timestamp);
1121 	alias->portgroups_count++;
1122 	ALIAS_UNLOCK(alias);
1123 	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1124 	return (PG_ERROR(0));
1125 
1126 states_failed:
1127 	for (i = 0; i < pg->chunks_count; i++)
1128 		uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1129 	free(pg->freemask_chunk, M_NAT64LSN);
1130 	free(pg->states_chunk, M_NAT64LSN);
1131 	uma_zfree(nat64lsn_pg_zone, pg);
1132 	return (PG_ERROR(7));
1133 }
1134 
1135 static int
1136 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1137 {
1138 	struct nat64lsn_aliaslink *link;
1139 	struct nat64lsn_alias *alias;
1140 	int ret;
1141 
1142 	link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1143 	if (link == NULL)
1144 		return (PG_ERROR(1));
1145 
1146 	/*
1147 	 * TODO: check that we did not already allocated PG in
1148 	 *	 previous call.
1149 	 */
1150 
1151 	ret = 0;
1152 	alias = link->alias;
1153 	/* Find place in pgchunk where PG can be added */
1154 	switch (ji->proto) {
1155 	case IPPROTO_TCP:
1156 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1157 		    &alias->tcp_chunkmask, alias->tcp_pgmask,
1158 		    alias->tcp, &alias->tcp_pg, ji->proto);
1159 		break;
1160 	case IPPROTO_UDP:
1161 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1162 		    &alias->udp_chunkmask, alias->udp_pgmask,
1163 		    alias->udp, &alias->udp_pg, ji->proto);
1164 		break;
1165 	case IPPROTO_ICMP:
1166 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1167 		    &alias->icmp_chunkmask, alias->icmp_pgmask,
1168 		    alias->icmp, &alias->icmp_pg, ji->proto);
1169 		break;
1170 	default:
1171 		panic("%s: wrong proto %d", __func__, ji->proto);
1172 	}
1173 	if (ret == PG_ERROR(1)) {
1174 		/*
1175 		 * PG_ERROR(1) means that alias lacks free PGs
1176 		 * XXX: try next alias.
1177 		 */
1178 		printf("NAT64LSN: %s: failed to obtain PG\n",
1179 		    __func__);
1180 		return (ret);
1181 	}
1182 	if (ret == PG_ERROR(0)) {
1183 		ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1184 		    ji->state_hval, ji->faddr, ji->port, ji->proto);
1185 		if (ji->state == NULL)
1186 			ret = PG_ERROR(8);
1187 		else
1188 			ji->done = 1;
1189 	}
1190 	return (ret);
1191 }
1192 
1193 static void
1194 nat64lsn_do_request(void *data)
1195 {
1196 	struct epoch_tracker et;
1197 	struct nat64lsn_job_head jhead;
1198 	struct nat64lsn_job_item *ji, *ji2;
1199 	struct nat64lsn_cfg *cfg;
1200 	int jcount;
1201 	uint8_t flags;
1202 
1203 	cfg = (struct nat64lsn_cfg *)data;
1204 	if (cfg->jlen == 0)
1205 		return;
1206 
1207 	CURVNET_SET(cfg->vp);
1208 	STAILQ_INIT(&jhead);
1209 
1210 	/* Grab queue */
1211 	JQUEUE_LOCK();
1212 	STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1213 	jcount = cfg->jlen;
1214 	cfg->jlen = 0;
1215 	JQUEUE_UNLOCK();
1216 
1217 	/* TODO: check if we need to resize hash */
1218 
1219 	NAT64STAT_INC(&cfg->base.stats, jcalls);
1220 	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1221 
1222 	/*
1223 	 * TODO:
1224 	 * What we should do here is to build a hash
1225 	 * to ensure we don't have lots of duplicate requests.
1226 	 * Skip this for now.
1227 	 *
1228 	 * TODO: Limit per-call number of items
1229 	 */
1230 
1231 	NAT64LSN_EPOCH_ENTER(et);
1232 	STAILQ_FOREACH(ji, &jhead, entries) {
1233 		switch (ji->jtype) {
1234 		case JTYPE_NEWHOST:
1235 			if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1236 				NAT64STAT_INC(&cfg->base.stats, jhostfails);
1237 			break;
1238 		case JTYPE_NEWPORTGROUP:
1239 			if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1240 				NAT64STAT_INC(&cfg->base.stats, jportfails);
1241 			break;
1242 		default:
1243 			continue;
1244 		}
1245 		if (ji->done != 0) {
1246 			flags = ji->proto != IPPROTO_TCP ? 0 :
1247 			    convert_tcp_flags(ji->f_id._flags);
1248 			nat64lsn_translate6_internal(cfg, &ji->m,
1249 			    ji->state, flags);
1250 			NAT64STAT_INC(&cfg->base.stats, jreinjected);
1251 		}
1252 	}
1253 	NAT64LSN_EPOCH_EXIT(et);
1254 
1255 	ji = STAILQ_FIRST(&jhead);
1256 	while (ji != NULL) {
1257 		ji2 = STAILQ_NEXT(ji, entries);
1258 		/*
1259 		 * In any case we must free mbuf if
1260 		 * translator did not consumed it.
1261 		 */
1262 		m_freem(ji->m);
1263 		uma_zfree(nat64lsn_job_zone, ji);
1264 		ji = ji2;
1265 	}
1266 	CURVNET_RESTORE();
1267 }
1268 
1269 static struct nat64lsn_job_item *
1270 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1271 {
1272 	struct nat64lsn_job_item *ji;
1273 
1274 	/*
1275 	 * Do not try to lock possibly contested mutex if we're near the
1276 	 * limit. Drop packet instead.
1277 	 */
1278 	ji = NULL;
1279 	if (cfg->jlen >= cfg->jmaxlen)
1280 		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1281 	else {
1282 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1283 		if (ji == NULL)
1284 			NAT64STAT_INC(&cfg->base.stats, jnomem);
1285 	}
1286 	if (ji == NULL) {
1287 		NAT64STAT_INC(&cfg->base.stats, dropped);
1288 		DPRINTF(DP_DROPS, "failed to create job");
1289 	} else {
1290 		ji->jtype = jtype;
1291 		ji->done = 0;
1292 	}
1293 	return (ji);
1294 }
1295 
1296 static void
1297 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1298 {
1299 
1300 	JQUEUE_LOCK();
1301 	STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1302 	NAT64STAT_INC(&cfg->base.stats, jrequests);
1303 	cfg->jlen++;
1304 
1305 	if (callout_pending(&cfg->jcallout) == 0)
1306 		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1307 	JQUEUE_UNLOCK();
1308 }
1309 
1310 static void
1311 nat64lsn_job_destroy(epoch_context_t ctx)
1312 {
1313 	struct nat64lsn_job_item *ji;
1314 	struct nat64lsn_host *host;
1315 	struct nat64lsn_pg *pg;
1316 	int i;
1317 
1318 	ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1319 	MPASS(ji->jtype == JTYPE_DESTROY);
1320 	while (!CK_SLIST_EMPTY(&ji->hosts)) {
1321 		host = CK_SLIST_FIRST(&ji->hosts);
1322 		CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1323 		if (host->states_count > 0) {
1324 			/*
1325 			 * XXX: The state has been created
1326 			 * during host deletion.
1327 			 */
1328 			printf("NAT64LSN: %s: destroying host with %d "
1329 			    "states\n", __func__, host->states_count);
1330 		}
1331 		nat64lsn_destroy_host(host);
1332 	}
1333 	while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1334 		pg = CK_SLIST_FIRST(&ji->portgroups);
1335 		CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1336 		for (i = 0; i < pg->chunks_count; i++) {
1337 			if (FREEMASK_BITCOUNT(pg, i) != 64) {
1338 				/*
1339 				 * XXX: The state has been created during
1340 				 * PG deletion.
1341 				 */
1342 				printf("NAT64LSN: %s: destroying PG %p "
1343 				    "with non-empty chunk %d\n", __func__,
1344 				    pg, i);
1345 			}
1346 		}
1347 		nat64lsn_destroy_pg(pg);
1348 	}
1349 	uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1350 	uma_zfree(nat64lsn_job_zone, ji);
1351 }
1352 
1353 static int
1354 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1355     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1356     in_addr_t faddr, uint16_t port, uint8_t proto)
1357 {
1358 	struct nat64lsn_job_item *ji;
1359 
1360 	ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1361 	if (ji != NULL) {
1362 		ji->m = *mp;
1363 		ji->f_id = *f_id;
1364 		ji->faddr = faddr;
1365 		ji->port = port;
1366 		ji->proto = proto;
1367 		ji->src6_hval = hval;
1368 
1369 		nat64lsn_enqueue_job(cfg, ji);
1370 		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1371 		*mp = NULL;
1372 	}
1373 	return (IP_FW_DENY);
1374 }
1375 
1376 static int
1377 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1378     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1379     in_addr_t faddr, uint16_t port, uint8_t proto)
1380 {
1381 	struct nat64lsn_job_item *ji;
1382 
1383 	ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1384 	if (ji != NULL) {
1385 		ji->m = *mp;
1386 		ji->f_id = *f_id;
1387 		ji->faddr = faddr;
1388 		ji->port = port;
1389 		ji->proto = proto;
1390 		ji->state_hval = hval;
1391 		ji->host = host;
1392 
1393 		nat64lsn_enqueue_job(cfg, ji);
1394 		NAT64STAT_INC(&cfg->base.stats, jportreq);
1395 		*mp = NULL;
1396 	}
1397 	return (IP_FW_DENY);
1398 }
1399 
1400 static int
1401 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1402     struct nat64lsn_state *state, uint8_t flags)
1403 {
1404 	struct pfloghdr loghdr, *logdata;
1405 	int ret;
1406 	uint16_t ts;
1407 
1408 	/* Update timestamp and flags if needed */
1409 	SET_AGE(ts);
1410 	if (state->timestamp != ts)
1411 		state->timestamp = ts;
1412 	if ((state->flags & flags) != 0)
1413 		state->flags |= flags;
1414 
1415 	if (cfg->base.flags & NAT64_LOG) {
1416 		logdata = &loghdr;
1417 		nat64lsn_log(logdata, *mp, AF_INET6, state);
1418 	} else
1419 		logdata = NULL;
1420 
1421 	ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1422 	    htons(state->aport), &cfg->base, logdata);
1423 	if (ret == NAT64SKIP)
1424 		return (cfg->nomatch_verdict);
1425 	if (ret == NAT64RETURN)
1426 		*mp = NULL;
1427 	return (IP_FW_DENY);
1428 }
1429 
1430 static int
1431 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1432     struct mbuf **mp)
1433 {
1434 	struct nat64lsn_state *state;
1435 	struct nat64lsn_host *host;
1436 	struct icmp6_hdr *icmp6;
1437 	uint32_t addr, hval, data[2];
1438 	int offset, proto;
1439 	uint16_t port;
1440 	uint8_t flags;
1441 
1442 	/* Check if protocol is supported */
1443 	port = f_id->src_port;
1444 	proto = f_id->proto;
1445 	switch (f_id->proto) {
1446 	case IPPROTO_ICMPV6:
1447 		/*
1448 		 * For ICMPv6 echo reply/request we use icmp6_id as
1449 		 * local port.
1450 		 */
1451 		offset = 0;
1452 		proto = nat64_getlasthdr(*mp, &offset);
1453 		if (proto < 0) {
1454 			NAT64STAT_INC(&cfg->base.stats, dropped);
1455 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
1456 			return (IP_FW_DENY);
1457 		}
1458 		if (proto == IPPROTO_ICMPV6) {
1459 			icmp6 = mtodo(*mp, offset);
1460 			if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1461 			    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1462 				port = ntohs(icmp6->icmp6_id);
1463 		}
1464 		proto = IPPROTO_ICMP;
1465 		/* FALLTHROUGH */
1466 	case IPPROTO_TCP:
1467 	case IPPROTO_UDP:
1468 		break;
1469 	default:
1470 		NAT64STAT_INC(&cfg->base.stats, noproto);
1471 		return (cfg->nomatch_verdict);
1472 	}
1473 
1474 	/* Extract IPv4 from destination IPv6 address */
1475 	addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1476 	if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1477 		char a[INET_ADDRSTRLEN];
1478 
1479 		NAT64STAT_INC(&cfg->base.stats, dropped);
1480 		DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1481 		    inet_ntop(AF_INET, &addr, a, sizeof(a)));
1482 		return (IP_FW_DENY); /* XXX: add extra stats? */
1483 	}
1484 
1485 	/* Try to find host */
1486 	hval = HOST_HVAL(cfg, &f_id->src_ip6);
1487 	CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1488 		if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1489 			break;
1490 	}
1491 	/* We use IPv4 address in host byte order */
1492 	addr = ntohl(addr);
1493 	if (host == NULL)
1494 		return (nat64lsn_request_host(cfg, f_id, mp,
1495 		    hval, addr, port, proto));
1496 
1497 	flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1498 
1499 	data[0] = addr;
1500 	data[1] = (f_id->dst_port << 16) | port;
1501 	hval = STATE_HVAL(cfg, data);
1502 	state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1503 	    port, proto);
1504 	if (state == NULL)
1505 		return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1506 		    port, proto));
1507 	return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1508 }
1509 
1510 /*
1511  * Main dataplane entry point.
1512  */
1513 int
1514 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1515     ipfw_insn *cmd, int *done)
1516 {
1517 	struct nat64lsn_cfg *cfg;
1518 	ipfw_insn *icmd;
1519 	int ret;
1520 
1521 	IPFW_RLOCK_ASSERT(ch);
1522 
1523 	*done = 0;	/* continue the search in case of failure */
1524 	icmd = cmd + 1;
1525 	if (cmd->opcode != O_EXTERNAL_ACTION ||
1526 	    cmd->arg1 != V_nat64lsn_eid ||
1527 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1528 	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1529 		return (IP_FW_DENY);
1530 
1531 	*done = 1;	/* terminate the search */
1532 
1533 	switch (args->f_id.addr_type) {
1534 	case 4:
1535 		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1536 		break;
1537 	case 6:
1538 		/*
1539 		 * Check that destination IPv6 address matches our prefix6.
1540 		 */
1541 		if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1542 		    memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1543 		    cfg->base.plat_plen / 8) != 0) {
1544 			ret = cfg->nomatch_verdict;
1545 			break;
1546 		}
1547 		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1548 		break;
1549 	default:
1550 		ret = cfg->nomatch_verdict;
1551 	}
1552 
1553 	if (ret != IP_FW_PASS && args->m != NULL) {
1554 		m_freem(args->m);
1555 		args->m = NULL;
1556 	}
1557 	return (ret);
1558 }
1559 
1560 static int
1561 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1562 {
1563 	struct nat64lsn_states_chunk *chunk;
1564 	int i;
1565 
1566 	chunk = (struct nat64lsn_states_chunk *)mem;
1567 	for (i = 0; i < 64; i++)
1568 		chunk->state[i].flags = 0;
1569 	return (0);
1570 }
1571 
1572 void
1573 nat64lsn_init_internal(void)
1574 {
1575 
1576 	nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1577 	    sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1578 	    UMA_ALIGN_PTR, 0);
1579 	nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1580 	    sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1581 	    UMA_ALIGN_PTR, 0);
1582 	nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1583 	    sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1584 	    UMA_ALIGN_PTR, 0);
1585 	nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1586 	    sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1587 	    UMA_ALIGN_PTR, 0);
1588 	nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1589 	    sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1590 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1591 	nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1592 	    sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1593 	    UMA_ALIGN_PTR, 0);
1594 	JQUEUE_LOCK_INIT();
1595 }
1596 
1597 void
1598 nat64lsn_uninit_internal(void)
1599 {
1600 
1601 	/* XXX: epoch_task drain */
1602 	JQUEUE_LOCK_DESTROY();
1603 	uma_zdestroy(nat64lsn_host_zone);
1604 	uma_zdestroy(nat64lsn_pgchunk_zone);
1605 	uma_zdestroy(nat64lsn_pg_zone);
1606 	uma_zdestroy(nat64lsn_aliaslink_zone);
1607 	uma_zdestroy(nat64lsn_state_zone);
1608 	uma_zdestroy(nat64lsn_job_zone);
1609 }
1610 
1611 void
1612 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1613 {
1614 
1615 	CALLOUT_LOCK(cfg);
1616 	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1617 	    nat64lsn_periodic, cfg);
1618 	CALLOUT_UNLOCK(cfg);
1619 }
1620 
1621 struct nat64lsn_cfg *
1622 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1623 {
1624 	struct nat64lsn_cfg *cfg;
1625 	struct nat64lsn_alias *alias;
1626 	int i, naddr;
1627 
1628 	cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1629 	    M_WAITOK | M_ZERO);
1630 
1631 	CFG_LOCK_INIT(cfg);
1632 	CALLOUT_LOCK_INIT(cfg);
1633 	STAILQ_INIT(&cfg->jhead);
1634 	cfg->vp = curvnet;
1635 	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1636 
1637 	cfg->hash_seed = arc4random();
1638 	cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1639 	cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1640 	    cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1641 	for (i = 0; i < cfg->hosts_hashsize; i++)
1642 		CK_SLIST_INIT(&cfg->hosts_hash[i]);
1643 
1644 	naddr = 1 << (32 - plen);
1645 	cfg->prefix4 = prefix;
1646 	cfg->pmask4 = prefix | (naddr - 1);
1647 	cfg->plen4 = plen;
1648 	cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1649 	    M_NAT64LSN, M_WAITOK | M_ZERO);
1650 	for (i = 0; i < naddr; i++) {
1651 		alias = &cfg->aliases[i];
1652 		alias->addr = prefix + i; /* host byte order */
1653 		CK_SLIST_INIT(&alias->hosts);
1654 		ALIAS_LOCK_INIT(alias);
1655 	}
1656 
1657         callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1658         callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1659 
1660 	return (cfg);
1661 }
1662 
1663 static void
1664 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1665 {
1666 	int i;
1667 
1668 	if (pg->chunks_count == 1) {
1669 		uma_zfree(nat64lsn_state_zone, pg->states);
1670 	} else {
1671 		for (i = 0; i < pg->chunks_count; i++)
1672 			uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1673 		free(pg->states_chunk, M_NAT64LSN);
1674 		free(pg->freemask_chunk, M_NAT64LSN);
1675 	}
1676 	uma_zfree(nat64lsn_pg_zone, pg);
1677 }
1678 
1679 static void
1680 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1681     struct nat64lsn_alias *alias)
1682 {
1683 	struct nat64lsn_pg *pg;
1684 	int i;
1685 
1686 	while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1687 		pg = CK_SLIST_FIRST(&alias->portgroups);
1688 		CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1689 		nat64lsn_destroy_pg(pg);
1690 	}
1691 	for (i = 0; i < 32; i++) {
1692 		if (ISSET32(alias->tcp_chunkmask, i))
1693 			uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1694 		if (ISSET32(alias->udp_chunkmask, i))
1695 			uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1696 		if (ISSET32(alias->icmp_chunkmask, i))
1697 			uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1698 	}
1699 	ALIAS_LOCK_DESTROY(alias);
1700 }
1701 
1702 static void
1703 nat64lsn_destroy_host(struct nat64lsn_host *host)
1704 {
1705 	struct nat64lsn_aliaslink *link;
1706 
1707 	while (!CK_SLIST_EMPTY(&host->aliases)) {
1708 		link = CK_SLIST_FIRST(&host->aliases);
1709 		CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1710 
1711 		ALIAS_LOCK(link->alias);
1712 		CK_SLIST_REMOVE(&link->alias->hosts, link,
1713 		    nat64lsn_aliaslink, alias_entries);
1714 		link->alias->hosts_count--;
1715 		ALIAS_UNLOCK(link->alias);
1716 
1717 		uma_zfree(nat64lsn_aliaslink_zone, link);
1718 	}
1719 	HOST_LOCK_DESTROY(host);
1720 	free(host->states_hash, M_NAT64LSN);
1721 	uma_zfree(nat64lsn_host_zone, host);
1722 }
1723 
1724 void
1725 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1726 {
1727 	struct nat64lsn_host *host;
1728 	int i;
1729 
1730 	CALLOUT_LOCK(cfg);
1731 	callout_drain(&cfg->periodic);
1732 	CALLOUT_UNLOCK(cfg);
1733 	callout_drain(&cfg->jcallout);
1734 
1735 	for (i = 0; i < cfg->hosts_hashsize; i++) {
1736 		while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1737 			host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1738 			CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1739 			nat64lsn_destroy_host(host);
1740 		}
1741 	}
1742 
1743 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1744 		nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1745 
1746 	CALLOUT_LOCK_DESTROY(cfg);
1747 	CFG_LOCK_DESTROY(cfg);
1748 	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1749 	free(cfg->hosts_hash, M_NAT64LSN);
1750 	free(cfg->aliases, M_NAT64LSN);
1751 	free(cfg, M_NAT64LSN);
1752 }
1753 
1754