xref: /freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c (revision 3dd5524264095ed8612c28908e13f80668eff2f9)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/ck.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
39 #include <sys/hash.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/rmlock.h>
46 #include <sys/socket.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_pflog.h>
53 #include <net/pfil.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_fw.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/ip_icmp.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet6/in6_var.h>
65 #include <netinet6/ip6_var.h>
66 #include <netinet6/ip_fw_nat64.h>
67 
68 #include <netpfil/ipfw/ip_fw_private.h>
69 #include <netpfil/pf/pf.h>
70 
71 #include "nat64lsn.h"
72 
73 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
74 
75 #define	NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
76 #define	NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
77 #define	NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
78 #define	NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
79 
80 static uma_zone_t nat64lsn_host_zone;
81 static uma_zone_t nat64lsn_pgchunk_zone;
82 static uma_zone_t nat64lsn_pg_zone;
83 static uma_zone_t nat64lsn_aliaslink_zone;
84 static uma_zone_t nat64lsn_state_zone;
85 static uma_zone_t nat64lsn_job_zone;
86 
87 static void nat64lsn_periodic(void *data);
88 #define	PERIODIC_DELAY		4
89 #define	NAT64_LOOKUP(chain, cmd)	\
90 	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
91 /*
92  * Delayed job queue, used to create new hosts
93  * and new portgroups
94  */
95 enum nat64lsn_jtype {
96 	JTYPE_NEWHOST = 1,
97 	JTYPE_NEWPORTGROUP,
98 	JTYPE_DESTROY,
99 };
100 
101 struct nat64lsn_job_item {
102 	STAILQ_ENTRY(nat64lsn_job_item)	entries;
103 	enum nat64lsn_jtype	jtype;
104 
105 	union {
106 		struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
107 			struct mbuf		*m;
108 			struct nat64lsn_host	*host;
109 			struct nat64lsn_state	*state;
110 			uint32_t		src6_hval;
111 			uint32_t		state_hval;
112 			struct ipfw_flow_id	f_id;
113 			in_addr_t		faddr;
114 			uint16_t		port;
115 			uint8_t			proto;
116 			uint8_t			done;
117 		};
118 		struct { /* used by JTYPE_DESTROY */
119 			struct nat64lsn_hosts_slist	hosts;
120 			struct nat64lsn_pg_slist	portgroups;
121 			struct nat64lsn_pgchunk		*pgchunk;
122 			struct epoch_context		epoch_ctx;
123 		};
124 	};
125 };
126 
127 static struct mtx jmtx;
128 #define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
129 #define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
130 #define	JQUEUE_LOCK()		mtx_lock(&jmtx)
131 #define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
132 
133 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
134     struct nat64lsn_job_item *ji);
135 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
136     struct nat64lsn_job_item *ji);
137 static struct nat64lsn_job_item *nat64lsn_create_job(
138     struct nat64lsn_cfg *cfg, int jtype);
139 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
140     struct nat64lsn_job_item *ji);
141 static void nat64lsn_job_destroy(epoch_context_t ctx);
142 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
143 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
144 
145 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
146     const struct ipfw_flow_id *f_id, struct mbuf **mp);
147 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
148     struct ipfw_flow_id *f_id, struct mbuf **mp);
149 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
150     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
151 
152 #define	NAT64_BIT_TCP_FIN	0	/* FIN was seen */
153 #define	NAT64_BIT_TCP_SYN	1	/* First syn in->out */
154 #define	NAT64_BIT_TCP_ESTAB	2	/* Packet with Ack */
155 #define	NAT64_BIT_READY_IPV4	6	/* state is ready for translate4 */
156 #define	NAT64_BIT_STALE		7	/* state is going to be expired */
157 
158 #define	NAT64_FLAG_FIN		(1 << NAT64_BIT_TCP_FIN)
159 #define	NAT64_FLAG_SYN		(1 << NAT64_BIT_TCP_SYN)
160 #define	NAT64_FLAG_ESTAB	(1 << NAT64_BIT_TCP_ESTAB)
161 #define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
162 
163 #define	NAT64_FLAG_READY	(1 << NAT64_BIT_READY_IPV4)
164 #define	NAT64_FLAG_STALE	(1 << NAT64_BIT_STALE)
165 
166 static inline uint8_t
167 convert_tcp_flags(uint8_t flags)
168 {
169 	uint8_t result;
170 
171 	result = flags & (TH_FIN|TH_SYN);
172 	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
173 	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
174 
175 	return (result);
176 }
177 
178 static void
179 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
180     struct nat64lsn_state *state)
181 {
182 
183 	memset(plog, 0, sizeof(*plog));
184 	plog->length = PFLOG_HDRLEN;
185 	plog->af = family;
186 	plog->action = PF_NAT;
187 	plog->dir = PF_IN;
188 	plog->rulenr = htonl(state->ip_src);
189 	plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
190 	    (state->proto << 8) | (state->ip_dst & 0xff));
191 	plog->ruleset[0] = '\0';
192 	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
193 	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
194 }
195 
196 #define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
197 #define	HOST_HVAL(c, a)	HVAL((a),\
198     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
199 #define	HOSTS(c, v)	((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
200 
201 #define	ALIASLINK_HVAL(c, f)	HVAL(&(f)->dst_ip6,\
202     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
203 #define	ALIAS_BYHASH(c, v)	\
204     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
205 static struct nat64lsn_aliaslink*
206 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
207     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
208 {
209 
210 	/*
211 	 * We can implement some different algorithms how
212 	 * select an alias address.
213 	 * XXX: for now we use first available.
214 	 */
215 	return (CK_SLIST_FIRST(&host->aliases));
216 }
217 
218 #define	STATE_HVAL(c, d)	HVAL((d), 2, (c)->hash_seed)
219 #define	STATE_HASH(h, v)	\
220     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
221 #define	STATES_CHUNK(p, v)	\
222     ((p)->chunks_count == 1 ? (p)->states : \
223 	((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
224 
225 #ifdef __LP64__
226 #define	FREEMASK_FFSLL(pg, faddr)		\
227     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
228 #define	FREEMASK_BTR(pg, faddr, bit)	\
229     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230 #define	FREEMASK_BTS(pg, faddr, bit)	\
231     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define	FREEMASK_ISSET(pg, faddr, bit)	\
233     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
234 #define	FREEMASK_COPY(pg, n, out)	\
235     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
236 #else
237 static inline int
238 freemask_ffsll(uint32_t *freemask)
239 {
240 	int i;
241 
242 	if ((i = ffsl(freemask[0])) != 0)
243 		return (i);
244 	if ((i = ffsl(freemask[1])) != 0)
245 		return (i + 32);
246 	return (0);
247 }
248 #define	FREEMASK_FFSLL(pg, faddr)		\
249     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
250 #define	FREEMASK_BTR(pg, faddr, bit)	\
251     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252 #define	FREEMASK_BTS(pg, faddr, bit)	\
253     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
254 #define	FREEMASK_ISSET(pg, faddr, bit)	\
255     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
256 #define	FREEMASK_COPY(pg, n, out)	\
257     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
258 	((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
259 #endif /* !__LP64__ */
260 
261 #define	NAT64LSN_TRY_PGCNT	32
262 static struct nat64lsn_pg*
263 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
264     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
265     uint32_t *pgidx, in_addr_t faddr)
266 {
267 	struct nat64lsn_pg *pg, *oldpg;
268 	uint32_t idx, oldidx;
269 	int cnt;
270 
271 	cnt = 0;
272 	/* First try last used PG */
273 	oldpg = pg = ck_pr_load_ptr(pgptr);
274 	idx = oldidx = ck_pr_load_32(pgidx);
275 	/* If pgidx is out of range, reset it to the first pgchunk */
276 	if (!ISSET32(*chunkmask, idx / 32))
277 		idx = 0;
278 	do {
279 		ck_pr_fence_load();
280 		if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
281 			/*
282 			 * If last used PG has not free states,
283 			 * try to update pointer.
284 			 * NOTE: it can be already updated by jobs handler,
285 			 *	 thus we use CAS operation.
286 			 */
287 			if (cnt > 0)
288 				ck_pr_cas_ptr(pgptr, oldpg, pg);
289 			return (pg);
290 		}
291 		/* Stop if idx is out of range */
292 		if (!ISSET32(*chunkmask, idx / 32))
293 			break;
294 
295 		if (ISSET32(pgmask[idx / 32], idx % 32))
296 			pg = ck_pr_load_ptr(
297 			    &chunks[idx / 32]->pgptr[idx % 32]);
298 		else
299 			pg = NULL;
300 
301 		idx++;
302 	} while (++cnt < NAT64LSN_TRY_PGCNT);
303 
304 	/* If pgidx is out of range, reset it to the first pgchunk */
305 	if (!ISSET32(*chunkmask, idx / 32))
306 		idx = 0;
307 	ck_pr_cas_32(pgidx, oldidx, idx);
308 	return (NULL);
309 }
310 
311 static struct nat64lsn_state*
312 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
313     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
314     uint16_t port, uint8_t proto)
315 {
316 	struct nat64lsn_aliaslink *link;
317 	struct nat64lsn_state *state;
318 	struct nat64lsn_pg *pg;
319 	int i, offset;
320 
321 	NAT64LSN_EPOCH_ASSERT();
322 
323 	/* Check that we already have state for given arguments */
324 	CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
325 		if (state->proto == proto && state->ip_dst == faddr &&
326 		    state->sport == port && state->dport == f_id->dst_port)
327 			return (state);
328 	}
329 
330 	link = nat64lsn_get_aliaslink(cfg, host, f_id);
331 	if (link == NULL)
332 		return (NULL);
333 
334 	switch (proto) {
335 	case IPPROTO_TCP:
336 		pg = nat64lsn_get_pg(
337 		    &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
338 		    link->alias->tcp, &link->alias->tcp_pg,
339 		    &link->alias->tcp_pgidx, faddr);
340 		break;
341 	case IPPROTO_UDP:
342 		pg = nat64lsn_get_pg(
343 		    &link->alias->udp_chunkmask, link->alias->udp_pgmask,
344 		    link->alias->udp, &link->alias->udp_pg,
345 		    &link->alias->udp_pgidx, faddr);
346 		break;
347 	case IPPROTO_ICMP:
348 		pg = nat64lsn_get_pg(
349 		    &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
350 		    link->alias->icmp, &link->alias->icmp_pg,
351 		    &link->alias->icmp_pgidx, faddr);
352 		break;
353 	default:
354 		panic("%s: wrong proto %d", __func__, proto);
355 	}
356 	if (pg == NULL)
357 		return (NULL);
358 
359 	/* Check that PG has some free states */
360 	state = NULL;
361 	i = FREEMASK_BITCOUNT(pg, faddr);
362 	while (i-- > 0) {
363 		offset = FREEMASK_FFSLL(pg, faddr);
364 		if (offset == 0) {
365 			/*
366 			 * We lost the race.
367 			 * No more free states in this PG.
368 			 */
369 			break;
370 		}
371 
372 		/* Lets try to atomically grab the state */
373 		if (FREEMASK_BTR(pg, faddr, offset - 1)) {
374 			state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
375 			/* Initialize */
376 			state->flags = proto != IPPROTO_TCP ? 0 :
377 			    convert_tcp_flags(f_id->_flags);
378 			state->proto = proto;
379 			state->aport = pg->base_port + offset - 1;
380 			state->dport = f_id->dst_port;
381 			state->sport = port;
382 			state->ip6_dst = f_id->dst_ip6;
383 			state->ip_dst = faddr;
384 			state->ip_src = link->alias->addr;
385 			state->hval = hval;
386 			state->host = host;
387 			SET_AGE(state->timestamp);
388 
389 			/* Insert new state into host's hash table */
390 			HOST_LOCK(host);
391 			CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
392 			    state, entries);
393 			host->states_count++;
394 			/*
395 			 * XXX: In case if host is going to be expired,
396 			 * reset NAT64LSN_DEADHOST flag.
397 			 */
398 			host->flags &= ~NAT64LSN_DEADHOST;
399 			HOST_UNLOCK(host);
400 			NAT64STAT_INC(&cfg->base.stats, screated);
401 			/* Mark the state as ready for translate4 */
402 			ck_pr_fence_store();
403 			ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
404 			break;
405 		}
406 	}
407 	return (state);
408 }
409 
410 /*
411  * Inspects icmp packets to see if the message contains different
412  * packet header so we need to alter @addr and @port.
413  */
414 static int
415 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
416     uint16_t *port)
417 {
418 	struct icmp *icmp;
419 	struct ip *ip;
420 	int off;
421 	uint8_t inner_proto;
422 
423 	ip = mtod(*mp, struct ip *); /* Outer IP header */
424 	off = (ip->ip_hl << 2) + ICMP_MINLEN;
425 	if ((*mp)->m_len < off)
426 		*mp = m_pullup(*mp, off);
427 	if (*mp == NULL)
428 		return (ENOMEM);
429 
430 	ip = mtod(*mp, struct ip *); /* Outer IP header */
431 	icmp = L3HDR(ip, struct icmp *);
432 	switch (icmp->icmp_type) {
433 	case ICMP_ECHO:
434 	case ICMP_ECHOREPLY:
435 		/* Use icmp ID as distinguisher */
436 		*port = ntohs(icmp->icmp_id);
437 		return (0);
438 	case ICMP_UNREACH:
439 	case ICMP_TIMXCEED:
440 		break;
441 	default:
442 		return (EOPNOTSUPP);
443 	}
444 	/*
445 	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
446 	 * of ULP header.
447 	 */
448 	if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
449 		return (EINVAL);
450 	if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
451 		*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
452 	if (*mp == NULL)
453 		return (ENOMEM);
454 	ip = mtodo(*mp, off); /* Inner IP header */
455 	inner_proto = ip->ip_p;
456 	off += ip->ip_hl << 2; /* Skip inner IP header */
457 	*addr = ntohl(ip->ip_src.s_addr);
458 	if ((*mp)->m_len < off + ICMP_MINLEN)
459 		*mp = m_pullup(*mp, off + ICMP_MINLEN);
460 	if (*mp == NULL)
461 		return (ENOMEM);
462 	switch (inner_proto) {
463 	case IPPROTO_TCP:
464 	case IPPROTO_UDP:
465 		/* Copy source port from the header */
466 		*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
467 		*proto = inner_proto;
468 		return (0);
469 	case IPPROTO_ICMP:
470 		/*
471 		 * We will translate only ICMP errors for our ICMP
472 		 * echo requests.
473 		 */
474 		icmp = mtodo(*mp, off);
475 		if (icmp->icmp_type != ICMP_ECHO)
476 			return (EOPNOTSUPP);
477 		*port = ntohs(icmp->icmp_id);
478 		return (0);
479 	};
480 	return (EOPNOTSUPP);
481 }
482 
483 static struct nat64lsn_state*
484 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
485     in_addr_t faddr, uint16_t port, uint8_t proto)
486 {
487 	struct nat64lsn_state *state;
488 	struct nat64lsn_pg *pg;
489 	int chunk_idx, pg_idx, state_idx;
490 
491 	NAT64LSN_EPOCH_ASSERT();
492 
493 	if (port < NAT64_MIN_PORT)
494 		return (NULL);
495 	/*
496 	 * Alias keeps 32 pgchunks for each protocol.
497 	 * Each pgchunk has 32 pointers to portgroup.
498 	 * Each portgroup has 64 states for ports.
499 	 */
500 	port -= NAT64_MIN_PORT;
501 	chunk_idx = port / 2048;
502 
503 	port -= chunk_idx * 2048;
504 	pg_idx = port / 64;
505 	state_idx = port % 64;
506 
507 	/*
508 	 * First check in proto_chunkmask that we have allocated PG chunk.
509 	 * Then check in proto_pgmask that we have valid PG pointer.
510 	 */
511 	pg = NULL;
512 	switch (proto) {
513 	case IPPROTO_TCP:
514 		if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
515 		    ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
516 			pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
517 			break;
518 		}
519 		return (NULL);
520 	case IPPROTO_UDP:
521 		if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
522 		    ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
523 			pg = alias->udp[chunk_idx]->pgptr[pg_idx];
524 			break;
525 		}
526 		return (NULL);
527 	case IPPROTO_ICMP:
528 		if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
529 		    ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
530 			pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
531 			break;
532 		}
533 		return (NULL);
534 	default:
535 		panic("%s: wrong proto %d", __func__, proto);
536 	}
537 	if (pg == NULL)
538 		return (NULL);
539 
540 	if (FREEMASK_ISSET(pg, faddr, state_idx))
541 		return (NULL);
542 
543 	state = &STATES_CHUNK(pg, faddr)->state[state_idx];
544 	ck_pr_fence_load();
545 	if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
546 		return (state);
547 	return (NULL);
548 }
549 
550 /*
551  * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
552  * that might be unknown until reassembling is completed.
553  */
554 static struct mbuf*
555 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
556     uint16_t *port)
557 {
558 	struct ip *ip;
559 	int len;
560 
561 	m = ip_reass(m);
562 	if (m == NULL)
563 		return (NULL);
564 	/* IP header must be contigious after ip_reass() */
565 	ip = mtod(m, struct ip *);
566 	len = ip->ip_hl << 2;
567 	switch (ip->ip_p) {
568 	case IPPROTO_ICMP:
569 		len += ICMP_MINLEN; /* Enough to get icmp_id */
570 		break;
571 	case IPPROTO_TCP:
572 		len += sizeof(struct tcphdr);
573 		break;
574 	case IPPROTO_UDP:
575 		len += sizeof(struct udphdr);
576 		break;
577 	default:
578 		m_freem(m);
579 		NAT64STAT_INC(&cfg->base.stats, noproto);
580 		return (NULL);
581 	}
582 	if (m->m_len < len) {
583 		m = m_pullup(m, len);
584 		if (m == NULL) {
585 			NAT64STAT_INC(&cfg->base.stats, nomem);
586 			return (NULL);
587 		}
588 		ip = mtod(m, struct ip *);
589 	}
590 	switch (ip->ip_p) {
591 	case IPPROTO_TCP:
592 		*port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
593 		break;
594 	case IPPROTO_UDP:
595 		*port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
596 		break;
597 	}
598 	return (m);
599 }
600 
601 static int
602 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
603     const struct ipfw_flow_id *f_id, struct mbuf **mp)
604 {
605 	struct pfloghdr loghdr, *logdata;
606 	struct in6_addr src6;
607 	struct nat64lsn_state *state;
608 	struct nat64lsn_alias *alias;
609 	uint32_t addr, flags;
610 	uint16_t port, ts;
611 	int ret;
612 	uint8_t proto;
613 
614 	addr = f_id->dst_ip;
615 	port = f_id->dst_port;
616 	proto = f_id->proto;
617 	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
618 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
619 		return (cfg->nomatch_verdict);
620 	}
621 
622 	/* Reassemble fragments if needed */
623 	ret = ntohs(mtod(*mp, struct ip *)->ip_off);
624 	if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
625 		*mp = nat64lsn_reassemble4(cfg, *mp, &port);
626 		if (*mp == NULL)
627 			return (IP_FW_DENY);
628 	}
629 
630 	/* Check if protocol is supported */
631 	switch (proto) {
632 	case IPPROTO_ICMP:
633 		ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
634 		if (ret != 0) {
635 			if (ret == ENOMEM) {
636 				NAT64STAT_INC(&cfg->base.stats, nomem);
637 				return (IP_FW_DENY);
638 			}
639 			NAT64STAT_INC(&cfg->base.stats, noproto);
640 			return (cfg->nomatch_verdict);
641 		}
642 		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
643 			NAT64STAT_INC(&cfg->base.stats, nomatch4);
644 			return (cfg->nomatch_verdict);
645 		}
646 		/* FALLTHROUGH */
647 	case IPPROTO_TCP:
648 	case IPPROTO_UDP:
649 		break;
650 	default:
651 		NAT64STAT_INC(&cfg->base.stats, noproto);
652 		return (cfg->nomatch_verdict);
653 	}
654 
655 	alias = &ALIAS_BYHASH(cfg, addr);
656 	MPASS(addr == alias->addr);
657 
658 	/* Check that we have state for this port */
659 	state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
660 	    port, proto);
661 	if (state == NULL) {
662 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
663 		return (cfg->nomatch_verdict);
664 	}
665 
666 	/* TODO: Check flags to see if we need to do some static mapping */
667 
668 	/* Update some state fields if need */
669 	SET_AGE(ts);
670 	if (f_id->proto == IPPROTO_TCP)
671 		flags = convert_tcp_flags(f_id->_flags);
672 	else
673 		flags = 0;
674 	if (state->timestamp != ts)
675 		state->timestamp = ts;
676 	if ((state->flags & flags) != flags)
677 		state->flags |= flags;
678 
679 	port = htons(state->sport);
680 	src6 = state->ip6_dst;
681 
682 	if (cfg->base.flags & NAT64_LOG) {
683 		logdata = &loghdr;
684 		nat64lsn_log(logdata, *mp, AF_INET, state);
685 	} else
686 		logdata = NULL;
687 
688 	/*
689 	 * We already have src6 with embedded address, but it is possible,
690 	 * that src_ip is different than state->ip_dst, this is why we
691 	 * do embedding again.
692 	 */
693 	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
694 	ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
695 	    &cfg->base, logdata);
696 	if (ret == NAT64SKIP)
697 		return (cfg->nomatch_verdict);
698 	if (ret == NAT64RETURN)
699 		*mp = NULL;
700 	return (IP_FW_DENY);
701 }
702 
703 /*
704  * Check if particular state is stale and should be deleted.
705  * Return 1 if true, 0 otherwise.
706  */
707 static int
708 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
709 {
710 	int age, ttl;
711 
712 	/* State was marked as stale in previous pass. */
713 	if (ISSET32(state->flags, NAT64_BIT_STALE))
714 		return (1);
715 
716 	/* State is not yet initialized, it is going to be READY */
717 	if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
718 		return (0);
719 
720 	age = GET_AGE(state->timestamp);
721 	switch (state->proto) {
722 	case IPPROTO_TCP:
723 		if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
724 			ttl = cfg->st_close_ttl;
725 		else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
726 			ttl = cfg->st_estab_ttl;
727 		else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
728 			ttl = cfg->st_syn_ttl;
729 		else
730 			ttl = cfg->st_syn_ttl;
731 		if (age > ttl)
732 			return (1);
733 		break;
734 	case IPPROTO_UDP:
735 		if (age > cfg->st_udp_ttl)
736 			return (1);
737 		break;
738 	case IPPROTO_ICMP:
739 		if (age > cfg->st_icmp_ttl)
740 			return (1);
741 		break;
742 	}
743 	return (0);
744 }
745 
746 static int
747 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
748 {
749 	struct nat64lsn_state *state;
750 	struct nat64lsn_host *host;
751 	uint64_t freemask;
752 	int c, i, update_age;
753 
754 	update_age = 0;
755 	for (c = 0; c < pg->chunks_count; c++) {
756 		FREEMASK_COPY(pg, c, freemask);
757 		for (i = 0; i < 64; i++) {
758 			if (ISSET64(freemask, i))
759 				continue;
760 			state = &STATES_CHUNK(pg, c)->state[i];
761 			if (nat64lsn_check_state(cfg, state) == 0) {
762 				update_age = 1;
763 				continue;
764 			}
765 			/*
766 			 * Expire state:
767 			 * 1. Mark as STALE and unlink from host's hash.
768 			 * 2. Set bit in freemask.
769 			 */
770 			if (ISSET32(state->flags, NAT64_BIT_STALE)) {
771 				/*
772 				 * State was marked as STALE in previous
773 				 * pass. Now it is safe to release it.
774 				 */
775 				state->flags = 0;
776 				ck_pr_fence_store();
777 				FREEMASK_BTS(pg, c, i);
778 				NAT64STAT_INC(&cfg->base.stats, sdeleted);
779 				continue;
780 			}
781 			MPASS(state->flags & NAT64_FLAG_READY);
782 
783 			host = state->host;
784 			HOST_LOCK(host);
785 			CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
786 			    state, nat64lsn_state, entries);
787 			host->states_count--;
788 			HOST_UNLOCK(host);
789 
790 			/* Reset READY flag */
791 			ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
792 			/* And set STALE flag */
793 			ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
794 			ck_pr_fence_store();
795 			/*
796 			 * Now translate6 will not use this state, wait
797 			 * until it become safe for translate4, then mark
798 			 * state as free.
799 			 */
800 		}
801 	}
802 
803 	/*
804 	 * We have some alive states, update timestamp.
805 	 */
806 	if (update_age)
807 		SET_AGE(pg->timestamp);
808 
809 	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
810 		return (0);
811 
812 	return (1);
813 }
814 
815 static void
816 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
817     struct nat64lsn_pg_slist *portgroups)
818 {
819 	struct nat64lsn_alias *alias;
820 	struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
821 	uint32_t *pgmask, *pgidx;
822 	int i, idx;
823 
824 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
825 		alias = &cfg->aliases[i];
826 		CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
827 			if (nat64lsn_maintain_pg(cfg, pg) == 0)
828 				continue;
829 			/* Always keep first PG */
830 			if (pg->base_port == NAT64_MIN_PORT)
831 				continue;
832 			/*
833 			 * PG is expired, unlink it and schedule for
834 			 * deferred destroying.
835 			 */
836 			idx = (pg->base_port - NAT64_MIN_PORT) / 64;
837 			switch (pg->proto) {
838 			case IPPROTO_TCP:
839 				pgmask = alias->tcp_pgmask;
840 				pgptr = &alias->tcp_pg;
841 				pgidx = &alias->tcp_pgidx;
842 				firstpg = alias->tcp[0]->pgptr[0];
843 				break;
844 			case IPPROTO_UDP:
845 				pgmask = alias->udp_pgmask;
846 				pgptr = &alias->udp_pg;
847 				pgidx = &alias->udp_pgidx;
848 				firstpg = alias->udp[0]->pgptr[0];
849 				break;
850 			case IPPROTO_ICMP:
851 				pgmask = alias->icmp_pgmask;
852 				pgptr = &alias->icmp_pg;
853 				pgidx = &alias->icmp_pgidx;
854 				firstpg = alias->icmp[0]->pgptr[0];
855 				break;
856 			}
857 			/* Reset the corresponding bit in pgmask array. */
858 			ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
859 			ck_pr_fence_store();
860 			/* If last used PG points to this PG, reset it. */
861 			ck_pr_cas_ptr(pgptr, pg, firstpg);
862 			ck_pr_cas_32(pgidx, idx, 0);
863 			/* Unlink PG from alias's chain */
864 			ALIAS_LOCK(alias);
865 			CK_SLIST_REMOVE(&alias->portgroups, pg,
866 			    nat64lsn_pg, entries);
867 			alias->portgroups_count--;
868 			ALIAS_UNLOCK(alias);
869 			/* And link to job's chain for deferred destroying */
870 			NAT64STAT_INC(&cfg->base.stats, spgdeleted);
871 			CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
872 		}
873 	}
874 }
875 
876 static void
877 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
878     struct nat64lsn_hosts_slist *hosts)
879 {
880 	struct nat64lsn_host *host, *tmp;
881 	int i;
882 
883 	for (i = 0; i < cfg->hosts_hashsize; i++) {
884 		CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
885 		    entries, tmp) {
886 			/* Is host was marked in previous call? */
887 			if (host->flags & NAT64LSN_DEADHOST) {
888 				if (host->states_count > 0) {
889 					host->flags &= ~NAT64LSN_DEADHOST;
890 					continue;
891 				}
892 				/*
893 				 * Unlink host from hash table and schedule
894 				 * it for deferred destroying.
895 				 */
896 				CFG_LOCK(cfg);
897 				CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
898 				    nat64lsn_host, entries);
899 				cfg->hosts_count--;
900 				CFG_UNLOCK(cfg);
901 				CK_SLIST_INSERT_HEAD(hosts, host, entries);
902 				continue;
903 			}
904 			if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
905 				continue;
906 			if (host->states_count > 0)
907 				continue;
908 			/* Mark host as going to be expired in next pass */
909 			host->flags |= NAT64LSN_DEADHOST;
910 			ck_pr_fence_store();
911 		}
912 	}
913 }
914 
915 static struct nat64lsn_pgchunk*
916 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
917 {
918 #if 0
919 	struct nat64lsn_alias *alias;
920 	struct nat64lsn_pgchunk *chunk;
921 	uint32_t pgmask;
922 	int i, c;
923 
924 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
925 		alias = &cfg->aliases[i];
926 		if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
927 			continue;
928 		/* Always keep single chunk allocated */
929 		for (c = 1; c < 32; c++) {
930 			if ((alias->tcp_chunkmask & (1 << c)) == 0)
931 				break;
932 			chunk = ck_pr_load_ptr(&alias->tcp[c]);
933 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
934 				continue;
935 			ck_pr_btr_32(&alias->tcp_chunkmask, c);
936 			ck_pr_fence_load();
937 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
938 				continue;
939 		}
940 	}
941 #endif
942 	return (NULL);
943 }
944 
945 #if 0
946 static void
947 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
948 {
949 	struct nat64lsn_host *h;
950 	struct nat64lsn_states_slist *hash;
951 	int i, j, hsize;
952 
953 	for (i = 0; i < cfg->hosts_hashsize; i++) {
954 		CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
955 			 if (h->states_count / 2 < h->states_hashsize ||
956 			     h->states_hashsize >= NAT64LSN_MAX_HSIZE)
957 				 continue;
958 			 hsize = h->states_hashsize * 2;
959 			 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
960 			 if (hash == NULL)
961 				 continue;
962 			 for (j = 0; j < hsize; j++)
963 				CK_SLIST_INIT(&hash[i]);
964 
965 			 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
966 		}
967 	}
968 }
969 #endif
970 
971 /*
972  * This procedure is used to perform various maintenance
973  * on dynamic hash list. Currently it is called every 4 seconds.
974  */
975 static void
976 nat64lsn_periodic(void *data)
977 {
978 	struct nat64lsn_job_item *ji;
979 	struct nat64lsn_cfg *cfg;
980 
981 	cfg = (struct nat64lsn_cfg *) data;
982 	CURVNET_SET(cfg->vp);
983 	if (cfg->hosts_count > 0) {
984 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
985 		if (ji != NULL) {
986 			ji->jtype = JTYPE_DESTROY;
987 			CK_SLIST_INIT(&ji->hosts);
988 			CK_SLIST_INIT(&ji->portgroups);
989 			nat64lsn_expire_hosts(cfg, &ji->hosts);
990 			nat64lsn_expire_portgroups(cfg, &ji->portgroups);
991 			ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
992 			NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
993 			    nat64lsn_job_destroy);
994 		} else
995 			NAT64STAT_INC(&cfg->base.stats, jnomem);
996 	}
997 	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
998 	CURVNET_RESTORE();
999 }
1000 
1001 #define	ALLOC_ERROR(stage, type)	((stage) ? 10 * (type) + (stage): 0)
1002 #define	HOST_ERROR(stage)		ALLOC_ERROR(stage, 1)
1003 #define	PG_ERROR(stage)			ALLOC_ERROR(stage, 2)
1004 static int
1005 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1006 {
1007 	char a[INET6_ADDRSTRLEN];
1008 	struct nat64lsn_aliaslink *link;
1009 	struct nat64lsn_host *host;
1010 	struct nat64lsn_state *state;
1011 	uint32_t hval, data[2];
1012 	int i;
1013 
1014 	/* Check that host was not yet added. */
1015 	NAT64LSN_EPOCH_ASSERT();
1016 	CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1017 		if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1018 			/* The host was allocated in previous call. */
1019 			ji->host = host;
1020 			goto get_state;
1021 		}
1022 	}
1023 
1024 	host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1025 	if (ji->host == NULL)
1026 		return (HOST_ERROR(1));
1027 
1028 	host->states_hashsize = NAT64LSN_HSIZE;
1029 	host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1030 	    host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1031 	if (host->states_hash == NULL) {
1032 		uma_zfree(nat64lsn_host_zone, host);
1033 		return (HOST_ERROR(2));
1034 	}
1035 
1036 	link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1037 	if (link == NULL) {
1038 		free(host->states_hash, M_NAT64LSN);
1039 		uma_zfree(nat64lsn_host_zone, host);
1040 		return (HOST_ERROR(3));
1041 	}
1042 
1043 	/* Initialize */
1044 	HOST_LOCK_INIT(host);
1045 	SET_AGE(host->timestamp);
1046 	host->addr = ji->f_id.src_ip6;
1047 	host->hval = ji->src6_hval;
1048 	host->flags = 0;
1049 	host->states_count = 0;
1050 	host->states_hashsize = NAT64LSN_HSIZE;
1051 	CK_SLIST_INIT(&host->aliases);
1052 	for (i = 0; i < host->states_hashsize; i++)
1053 		CK_SLIST_INIT(&host->states_hash[i]);
1054 
1055 	/* Determine alias from flow hash. */
1056 	hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1057 	link->alias = &ALIAS_BYHASH(cfg, hval);
1058 	CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1059 
1060 	ALIAS_LOCK(link->alias);
1061 	CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1062 	link->alias->hosts_count++;
1063 	ALIAS_UNLOCK(link->alias);
1064 
1065 	CFG_LOCK(cfg);
1066 	CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1067 	cfg->hosts_count++;
1068 	CFG_UNLOCK(cfg);
1069 
1070 get_state:
1071 	data[0] = ji->faddr;
1072 	data[1] = (ji->f_id.dst_port << 16) | ji->port;
1073 	ji->state_hval = hval = STATE_HVAL(cfg, data);
1074 	state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1075 	    ji->faddr, ji->port, ji->proto);
1076 	/*
1077 	 * We failed to obtain new state, used alias needs new PG.
1078 	 * XXX: or another alias should be used.
1079 	 */
1080 	if (state == NULL) {
1081 		/* Try to allocate new PG */
1082 		if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1083 			return (HOST_ERROR(4));
1084 		/* We assume that nat64lsn_alloc_pg() got state */
1085 	} else
1086 		ji->state = state;
1087 
1088 	ji->done = 1;
1089 	DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1090 	    inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1091 	return (HOST_ERROR(0));
1092 }
1093 
1094 static int
1095 nat64lsn_find_pg_place(uint32_t *data)
1096 {
1097 	int i;
1098 
1099 	for (i = 0; i < 32; i++) {
1100 		if (~data[i] == 0)
1101 			continue;
1102 		return (i * 32 + ffs(~data[i]) - 1);
1103 	}
1104 	return (-1);
1105 }
1106 
1107 static int
1108 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1109     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1110     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1111     struct nat64lsn_pg **pgptr, uint8_t proto)
1112 {
1113 	struct nat64lsn_pg *pg;
1114 	int i, pg_idx, chunk_idx;
1115 
1116 	/* Find place in pgchunk where PG can be added */
1117 	pg_idx = nat64lsn_find_pg_place(pgmask);
1118 	if (pg_idx < 0)	/* no more PGs */
1119 		return (PG_ERROR(1));
1120 	/* Check that we have allocated pgchunk for given PG index */
1121 	chunk_idx = pg_idx / 32;
1122 	if (!ISSET32(*chunkmask, chunk_idx)) {
1123 		chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1124 		    M_NOWAIT);
1125 		if (chunks[chunk_idx] == NULL)
1126 			return (PG_ERROR(2));
1127 		ck_pr_bts_32(chunkmask, chunk_idx);
1128 		ck_pr_fence_store();
1129 	}
1130 	/* Allocate PG and states chunks */
1131 	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1132 	if (pg == NULL)
1133 		return (PG_ERROR(3));
1134 	pg->chunks_count = cfg->states_chunks;
1135 	if (pg->chunks_count > 1) {
1136 		pg->freemask_chunk = malloc(pg->chunks_count *
1137 		    sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1138 		if (pg->freemask_chunk == NULL) {
1139 			uma_zfree(nat64lsn_pg_zone, pg);
1140 			return (PG_ERROR(4));
1141 		}
1142 		pg->states_chunk = malloc(pg->chunks_count *
1143 		    sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1144 		    M_NOWAIT | M_ZERO);
1145 		if (pg->states_chunk == NULL) {
1146 			free(pg->freemask_chunk, M_NAT64LSN);
1147 			uma_zfree(nat64lsn_pg_zone, pg);
1148 			return (PG_ERROR(5));
1149 		}
1150 		for (i = 0; i < pg->chunks_count; i++) {
1151 			pg->states_chunk[i] = uma_zalloc(
1152 			    nat64lsn_state_zone, M_NOWAIT);
1153 			if (pg->states_chunk[i] == NULL)
1154 				goto states_failed;
1155 		}
1156 		memset(pg->freemask_chunk, 0xff,
1157 		    sizeof(uint64_t) * pg->chunks_count);
1158 	} else {
1159 		pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1160 		if (pg->states == NULL) {
1161 			uma_zfree(nat64lsn_pg_zone, pg);
1162 			return (PG_ERROR(6));
1163 		}
1164 		memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1165 	}
1166 
1167 	/* Initialize PG and hook it to pgchunk */
1168 	SET_AGE(pg->timestamp);
1169 	pg->proto = proto;
1170 	pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1171 	ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1172 	ck_pr_fence_store();
1173 	ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1174 	ck_pr_store_ptr(pgptr, pg);
1175 
1176 	ALIAS_LOCK(alias);
1177 	CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1178 	SET_AGE(alias->timestamp);
1179 	alias->portgroups_count++;
1180 	ALIAS_UNLOCK(alias);
1181 	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1182 	return (PG_ERROR(0));
1183 
1184 states_failed:
1185 	for (i = 0; i < pg->chunks_count; i++)
1186 		uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1187 	free(pg->freemask_chunk, M_NAT64LSN);
1188 	free(pg->states_chunk, M_NAT64LSN);
1189 	uma_zfree(nat64lsn_pg_zone, pg);
1190 	return (PG_ERROR(7));
1191 }
1192 
1193 static int
1194 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1195 {
1196 	struct nat64lsn_aliaslink *link;
1197 	struct nat64lsn_alias *alias;
1198 	int ret;
1199 
1200 	link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1201 	if (link == NULL)
1202 		return (PG_ERROR(1));
1203 
1204 	/*
1205 	 * TODO: check that we did not already allocated PG in
1206 	 *	 previous call.
1207 	 */
1208 
1209 	ret = 0;
1210 	alias = link->alias;
1211 	/* Find place in pgchunk where PG can be added */
1212 	switch (ji->proto) {
1213 	case IPPROTO_TCP:
1214 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1215 		    &alias->tcp_chunkmask, alias->tcp_pgmask,
1216 		    alias->tcp, &alias->tcp_pg, ji->proto);
1217 		break;
1218 	case IPPROTO_UDP:
1219 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1220 		    &alias->udp_chunkmask, alias->udp_pgmask,
1221 		    alias->udp, &alias->udp_pg, ji->proto);
1222 		break;
1223 	case IPPROTO_ICMP:
1224 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1225 		    &alias->icmp_chunkmask, alias->icmp_pgmask,
1226 		    alias->icmp, &alias->icmp_pg, ji->proto);
1227 		break;
1228 	default:
1229 		panic("%s: wrong proto %d", __func__, ji->proto);
1230 	}
1231 	if (ret == PG_ERROR(1)) {
1232 		/*
1233 		 * PG_ERROR(1) means that alias lacks free PGs
1234 		 * XXX: try next alias.
1235 		 */
1236 		printf("NAT64LSN: %s: failed to obtain PG\n",
1237 		    __func__);
1238 		return (ret);
1239 	}
1240 	if (ret == PG_ERROR(0)) {
1241 		ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1242 		    ji->state_hval, ji->faddr, ji->port, ji->proto);
1243 		if (ji->state == NULL)
1244 			ret = PG_ERROR(8);
1245 		else
1246 			ji->done = 1;
1247 	}
1248 	return (ret);
1249 }
1250 
1251 static void
1252 nat64lsn_do_request(void *data)
1253 {
1254 	struct epoch_tracker et;
1255 	struct nat64lsn_job_head jhead;
1256 	struct nat64lsn_job_item *ji, *ji2;
1257 	struct nat64lsn_cfg *cfg;
1258 	int jcount;
1259 	uint8_t flags;
1260 
1261 	cfg = (struct nat64lsn_cfg *)data;
1262 	if (cfg->jlen == 0)
1263 		return;
1264 
1265 	CURVNET_SET(cfg->vp);
1266 	STAILQ_INIT(&jhead);
1267 
1268 	/* Grab queue */
1269 	JQUEUE_LOCK();
1270 	STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1271 	jcount = cfg->jlen;
1272 	cfg->jlen = 0;
1273 	JQUEUE_UNLOCK();
1274 
1275 	/* TODO: check if we need to resize hash */
1276 
1277 	NAT64STAT_INC(&cfg->base.stats, jcalls);
1278 	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1279 
1280 	/*
1281 	 * TODO:
1282 	 * What we should do here is to build a hash
1283 	 * to ensure we don't have lots of duplicate requests.
1284 	 * Skip this for now.
1285 	 *
1286 	 * TODO: Limit per-call number of items
1287 	 */
1288 
1289 	NAT64LSN_EPOCH_ENTER(et);
1290 	STAILQ_FOREACH(ji, &jhead, entries) {
1291 		switch (ji->jtype) {
1292 		case JTYPE_NEWHOST:
1293 			if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1294 				NAT64STAT_INC(&cfg->base.stats, jhostfails);
1295 			break;
1296 		case JTYPE_NEWPORTGROUP:
1297 			if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1298 				NAT64STAT_INC(&cfg->base.stats, jportfails);
1299 			break;
1300 		default:
1301 			continue;
1302 		}
1303 		if (ji->done != 0) {
1304 			flags = ji->proto != IPPROTO_TCP ? 0 :
1305 			    convert_tcp_flags(ji->f_id._flags);
1306 			nat64lsn_translate6_internal(cfg, &ji->m,
1307 			    ji->state, flags);
1308 			NAT64STAT_INC(&cfg->base.stats, jreinjected);
1309 		}
1310 	}
1311 	NAT64LSN_EPOCH_EXIT(et);
1312 
1313 	ji = STAILQ_FIRST(&jhead);
1314 	while (ji != NULL) {
1315 		ji2 = STAILQ_NEXT(ji, entries);
1316 		/*
1317 		 * In any case we must free mbuf if
1318 		 * translator did not consumed it.
1319 		 */
1320 		m_freem(ji->m);
1321 		uma_zfree(nat64lsn_job_zone, ji);
1322 		ji = ji2;
1323 	}
1324 	CURVNET_RESTORE();
1325 }
1326 
1327 static struct nat64lsn_job_item *
1328 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1329 {
1330 	struct nat64lsn_job_item *ji;
1331 
1332 	/*
1333 	 * Do not try to lock possibly contested mutex if we're near the
1334 	 * limit. Drop packet instead.
1335 	 */
1336 	ji = NULL;
1337 	if (cfg->jlen >= cfg->jmaxlen)
1338 		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1339 	else {
1340 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1341 		if (ji == NULL)
1342 			NAT64STAT_INC(&cfg->base.stats, jnomem);
1343 	}
1344 	if (ji == NULL) {
1345 		NAT64STAT_INC(&cfg->base.stats, dropped);
1346 		DPRINTF(DP_DROPS, "failed to create job");
1347 	} else {
1348 		ji->jtype = jtype;
1349 		ji->done = 0;
1350 	}
1351 	return (ji);
1352 }
1353 
1354 static void
1355 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1356 {
1357 
1358 	JQUEUE_LOCK();
1359 	STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1360 	NAT64STAT_INC(&cfg->base.stats, jrequests);
1361 	cfg->jlen++;
1362 
1363 	if (callout_pending(&cfg->jcallout) == 0)
1364 		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1365 	JQUEUE_UNLOCK();
1366 }
1367 
1368 static void
1369 nat64lsn_job_destroy(epoch_context_t ctx)
1370 {
1371 	struct nat64lsn_job_item *ji;
1372 	struct nat64lsn_host *host;
1373 	struct nat64lsn_pg *pg;
1374 	int i;
1375 
1376 	ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1377 	MPASS(ji->jtype == JTYPE_DESTROY);
1378 	while (!CK_SLIST_EMPTY(&ji->hosts)) {
1379 		host = CK_SLIST_FIRST(&ji->hosts);
1380 		CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1381 		if (host->states_count > 0) {
1382 			/*
1383 			 * XXX: The state has been created
1384 			 * during host deletion.
1385 			 */
1386 			printf("NAT64LSN: %s: destroying host with %d "
1387 			    "states\n", __func__, host->states_count);
1388 		}
1389 		nat64lsn_destroy_host(host);
1390 	}
1391 	while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1392 		pg = CK_SLIST_FIRST(&ji->portgroups);
1393 		CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1394 		for (i = 0; i < pg->chunks_count; i++) {
1395 			if (FREEMASK_BITCOUNT(pg, i) != 64) {
1396 				/*
1397 				 * XXX: The state has been created during
1398 				 * PG deletion.
1399 				 */
1400 				printf("NAT64LSN: %s: destroying PG %p "
1401 				    "with non-empty chunk %d\n", __func__,
1402 				    pg, i);
1403 			}
1404 		}
1405 		nat64lsn_destroy_pg(pg);
1406 	}
1407 	uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1408 	uma_zfree(nat64lsn_job_zone, ji);
1409 }
1410 
1411 static int
1412 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1413     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1414     in_addr_t faddr, uint16_t port, uint8_t proto)
1415 {
1416 	struct nat64lsn_job_item *ji;
1417 
1418 	ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1419 	if (ji != NULL) {
1420 		ji->m = *mp;
1421 		ji->f_id = *f_id;
1422 		ji->faddr = faddr;
1423 		ji->port = port;
1424 		ji->proto = proto;
1425 		ji->src6_hval = hval;
1426 
1427 		nat64lsn_enqueue_job(cfg, ji);
1428 		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1429 		*mp = NULL;
1430 	}
1431 	return (IP_FW_DENY);
1432 }
1433 
1434 static int
1435 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1436     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1437     in_addr_t faddr, uint16_t port, uint8_t proto)
1438 {
1439 	struct nat64lsn_job_item *ji;
1440 
1441 	ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1442 	if (ji != NULL) {
1443 		ji->m = *mp;
1444 		ji->f_id = *f_id;
1445 		ji->faddr = faddr;
1446 		ji->port = port;
1447 		ji->proto = proto;
1448 		ji->state_hval = hval;
1449 		ji->host = host;
1450 
1451 		nat64lsn_enqueue_job(cfg, ji);
1452 		NAT64STAT_INC(&cfg->base.stats, jportreq);
1453 		*mp = NULL;
1454 	}
1455 	return (IP_FW_DENY);
1456 }
1457 
1458 static int
1459 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1460     struct nat64lsn_state *state, uint8_t flags)
1461 {
1462 	struct pfloghdr loghdr, *logdata;
1463 	int ret;
1464 	uint16_t ts;
1465 
1466 	/* Update timestamp and flags if needed */
1467 	SET_AGE(ts);
1468 	if (state->timestamp != ts)
1469 		state->timestamp = ts;
1470 	if ((state->flags & flags) != 0)
1471 		state->flags |= flags;
1472 
1473 	if (cfg->base.flags & NAT64_LOG) {
1474 		logdata = &loghdr;
1475 		nat64lsn_log(logdata, *mp, AF_INET6, state);
1476 	} else
1477 		logdata = NULL;
1478 
1479 	ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1480 	    htons(state->aport), &cfg->base, logdata);
1481 	if (ret == NAT64SKIP)
1482 		return (cfg->nomatch_verdict);
1483 	if (ret == NAT64RETURN)
1484 		*mp = NULL;
1485 	return (IP_FW_DENY);
1486 }
1487 
1488 static int
1489 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1490     struct mbuf **mp)
1491 {
1492 	struct nat64lsn_state *state;
1493 	struct nat64lsn_host *host;
1494 	struct icmp6_hdr *icmp6;
1495 	uint32_t addr, hval, data[2];
1496 	int offset, proto;
1497 	uint16_t port;
1498 	uint8_t flags;
1499 
1500 	/* Check if protocol is supported */
1501 	port = f_id->src_port;
1502 	proto = f_id->proto;
1503 	switch (f_id->proto) {
1504 	case IPPROTO_ICMPV6:
1505 		/*
1506 		 * For ICMPv6 echo reply/request we use icmp6_id as
1507 		 * local port.
1508 		 */
1509 		offset = 0;
1510 		proto = nat64_getlasthdr(*mp, &offset);
1511 		if (proto < 0) {
1512 			NAT64STAT_INC(&cfg->base.stats, dropped);
1513 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
1514 			return (IP_FW_DENY);
1515 		}
1516 		if (proto == IPPROTO_ICMPV6) {
1517 			icmp6 = mtodo(*mp, offset);
1518 			if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1519 			    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1520 				port = ntohs(icmp6->icmp6_id);
1521 		}
1522 		proto = IPPROTO_ICMP;
1523 		/* FALLTHROUGH */
1524 	case IPPROTO_TCP:
1525 	case IPPROTO_UDP:
1526 		break;
1527 	default:
1528 		NAT64STAT_INC(&cfg->base.stats, noproto);
1529 		return (cfg->nomatch_verdict);
1530 	}
1531 
1532 	/* Extract IPv4 from destination IPv6 address */
1533 	addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1534 	if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1535 		char a[INET_ADDRSTRLEN];
1536 
1537 		NAT64STAT_INC(&cfg->base.stats, dropped);
1538 		DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1539 		    inet_ntop(AF_INET, &addr, a, sizeof(a)));
1540 		return (IP_FW_DENY); /* XXX: add extra stats? */
1541 	}
1542 
1543 	/* Try to find host */
1544 	hval = HOST_HVAL(cfg, &f_id->src_ip6);
1545 	CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1546 		if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1547 			break;
1548 	}
1549 	/* We use IPv4 address in host byte order */
1550 	addr = ntohl(addr);
1551 	if (host == NULL)
1552 		return (nat64lsn_request_host(cfg, f_id, mp,
1553 		    hval, addr, port, proto));
1554 
1555 	flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1556 
1557 	data[0] = addr;
1558 	data[1] = (f_id->dst_port << 16) | port;
1559 	hval = STATE_HVAL(cfg, data);
1560 	state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1561 	    port, proto);
1562 	if (state == NULL)
1563 		return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1564 		    port, proto));
1565 	return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1566 }
1567 
1568 /*
1569  * Main dataplane entry point.
1570  */
1571 int
1572 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1573     ipfw_insn *cmd, int *done)
1574 {
1575 	struct nat64lsn_cfg *cfg;
1576 	ipfw_insn *icmd;
1577 	int ret;
1578 
1579 	IPFW_RLOCK_ASSERT(ch);
1580 
1581 	*done = 0;	/* continue the search in case of failure */
1582 	icmd = cmd + 1;
1583 	if (cmd->opcode != O_EXTERNAL_ACTION ||
1584 	    cmd->arg1 != V_nat64lsn_eid ||
1585 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1586 	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1587 		return (IP_FW_DENY);
1588 
1589 	*done = 1;	/* terminate the search */
1590 
1591 	switch (args->f_id.addr_type) {
1592 	case 4:
1593 		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1594 		break;
1595 	case 6:
1596 		/*
1597 		 * Check that destination IPv6 address matches our prefix6.
1598 		 */
1599 		if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1600 		    memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1601 		    cfg->base.plat_plen / 8) != 0) {
1602 			ret = cfg->nomatch_verdict;
1603 			break;
1604 		}
1605 		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1606 		break;
1607 	default:
1608 		ret = cfg->nomatch_verdict;
1609 	}
1610 
1611 	if (ret != IP_FW_PASS && args->m != NULL) {
1612 		m_freem(args->m);
1613 		args->m = NULL;
1614 	}
1615 	return (ret);
1616 }
1617 
1618 static int
1619 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1620 {
1621 	struct nat64lsn_states_chunk *chunk;
1622 	int i;
1623 
1624 	chunk = (struct nat64lsn_states_chunk *)mem;
1625 	for (i = 0; i < 64; i++)
1626 		chunk->state[i].flags = 0;
1627 	return (0);
1628 }
1629 
1630 void
1631 nat64lsn_init_internal(void)
1632 {
1633 
1634 	nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1635 	    sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1636 	    UMA_ALIGN_PTR, 0);
1637 	nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1638 	    sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1639 	    UMA_ALIGN_PTR, 0);
1640 	nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1641 	    sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1642 	    UMA_ALIGN_PTR, 0);
1643 	nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1644 	    sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1645 	    UMA_ALIGN_PTR, 0);
1646 	nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1647 	    sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1648 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1649 	nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1650 	    sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1651 	    UMA_ALIGN_PTR, 0);
1652 	JQUEUE_LOCK_INIT();
1653 }
1654 
1655 void
1656 nat64lsn_uninit_internal(void)
1657 {
1658 
1659 	/* XXX: epoch_task drain */
1660 	JQUEUE_LOCK_DESTROY();
1661 	uma_zdestroy(nat64lsn_host_zone);
1662 	uma_zdestroy(nat64lsn_pgchunk_zone);
1663 	uma_zdestroy(nat64lsn_pg_zone);
1664 	uma_zdestroy(nat64lsn_aliaslink_zone);
1665 	uma_zdestroy(nat64lsn_state_zone);
1666 	uma_zdestroy(nat64lsn_job_zone);
1667 }
1668 
1669 void
1670 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1671 {
1672 
1673 	CALLOUT_LOCK(cfg);
1674 	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1675 	    nat64lsn_periodic, cfg);
1676 	CALLOUT_UNLOCK(cfg);
1677 }
1678 
1679 struct nat64lsn_cfg *
1680 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1681 {
1682 	struct nat64lsn_cfg *cfg;
1683 	struct nat64lsn_alias *alias;
1684 	int i, naddr;
1685 
1686 	cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1687 	    M_WAITOK | M_ZERO);
1688 
1689 	CFG_LOCK_INIT(cfg);
1690 	CALLOUT_LOCK_INIT(cfg);
1691 	STAILQ_INIT(&cfg->jhead);
1692 	cfg->vp = curvnet;
1693 	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1694 
1695 	cfg->hash_seed = arc4random();
1696 	cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1697 	cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1698 	    cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1699 	for (i = 0; i < cfg->hosts_hashsize; i++)
1700 		CK_SLIST_INIT(&cfg->hosts_hash[i]);
1701 
1702 	naddr = 1 << (32 - plen);
1703 	cfg->prefix4 = prefix;
1704 	cfg->pmask4 = prefix | (naddr - 1);
1705 	cfg->plen4 = plen;
1706 	cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1707 	    M_NAT64LSN, M_WAITOK | M_ZERO);
1708 	for (i = 0; i < naddr; i++) {
1709 		alias = &cfg->aliases[i];
1710 		alias->addr = prefix + i; /* host byte order */
1711 		CK_SLIST_INIT(&alias->hosts);
1712 		ALIAS_LOCK_INIT(alias);
1713 	}
1714 
1715 	callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1716 	callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1717 
1718 	return (cfg);
1719 }
1720 
1721 static void
1722 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1723 {
1724 	int i;
1725 
1726 	if (pg->chunks_count == 1) {
1727 		uma_zfree(nat64lsn_state_zone, pg->states);
1728 	} else {
1729 		for (i = 0; i < pg->chunks_count; i++)
1730 			uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1731 		free(pg->states_chunk, M_NAT64LSN);
1732 		free(pg->freemask_chunk, M_NAT64LSN);
1733 	}
1734 	uma_zfree(nat64lsn_pg_zone, pg);
1735 }
1736 
1737 static void
1738 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1739     struct nat64lsn_alias *alias)
1740 {
1741 	struct nat64lsn_pg *pg;
1742 	int i;
1743 
1744 	while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1745 		pg = CK_SLIST_FIRST(&alias->portgroups);
1746 		CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1747 		nat64lsn_destroy_pg(pg);
1748 	}
1749 	for (i = 0; i < 32; i++) {
1750 		if (ISSET32(alias->tcp_chunkmask, i))
1751 			uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1752 		if (ISSET32(alias->udp_chunkmask, i))
1753 			uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1754 		if (ISSET32(alias->icmp_chunkmask, i))
1755 			uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1756 	}
1757 	ALIAS_LOCK_DESTROY(alias);
1758 }
1759 
1760 static void
1761 nat64lsn_destroy_host(struct nat64lsn_host *host)
1762 {
1763 	struct nat64lsn_aliaslink *link;
1764 
1765 	while (!CK_SLIST_EMPTY(&host->aliases)) {
1766 		link = CK_SLIST_FIRST(&host->aliases);
1767 		CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1768 
1769 		ALIAS_LOCK(link->alias);
1770 		CK_SLIST_REMOVE(&link->alias->hosts, link,
1771 		    nat64lsn_aliaslink, alias_entries);
1772 		link->alias->hosts_count--;
1773 		ALIAS_UNLOCK(link->alias);
1774 
1775 		uma_zfree(nat64lsn_aliaslink_zone, link);
1776 	}
1777 	HOST_LOCK_DESTROY(host);
1778 	free(host->states_hash, M_NAT64LSN);
1779 	uma_zfree(nat64lsn_host_zone, host);
1780 }
1781 
1782 void
1783 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1784 {
1785 	struct nat64lsn_host *host;
1786 	int i;
1787 
1788 	CALLOUT_LOCK(cfg);
1789 	callout_drain(&cfg->periodic);
1790 	CALLOUT_UNLOCK(cfg);
1791 	callout_drain(&cfg->jcallout);
1792 
1793 	for (i = 0; i < cfg->hosts_hashsize; i++) {
1794 		while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1795 			host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1796 			CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1797 			nat64lsn_destroy_host(host);
1798 		}
1799 	}
1800 
1801 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1802 		nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1803 
1804 	CALLOUT_LOCK_DESTROY(cfg);
1805 	CFG_LOCK_DESTROY(cfg);
1806 	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1807 	free(cfg->hosts_hash, M_NAT64LSN);
1808 	free(cfg->aliases, M_NAT64LSN);
1809 	free(cfg, M_NAT64LSN);
1810 }
1811