xref: /freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/counter.h>
36 #include <sys/ck.h>
37 #include <sys/epoch.h>
38 #include <sys/errno.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/module.h>
44 #include <sys/rmlock.h>
45 #include <sys/rwlock.h>
46 #include <sys/socket.h>
47 #include <sys/sockopt.h>
48 
49 #include <net/if.h>
50 
51 #include <netinet/in.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/ip_fw.h>
55 #include <netinet6/ip_fw_nat64.h>
56 
57 #include <netpfil/ipfw/ip_fw_private.h>
58 
59 #include "nat64lsn.h"
60 
61 VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
62 
63 static struct nat64lsn_cfg *
64 nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
65 {
66 	struct nat64lsn_cfg *cfg;
67 
68 	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
69 	    IPFW_TLV_NAT64LSN_NAME, name);
70 
71 	return (cfg);
72 }
73 
74 static void
75 nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
76 {
77 
78 	if (uc->jmaxlen == 0)
79 		uc->jmaxlen = NAT64LSN_JMAXLEN;
80 	if (uc->jmaxlen > 65536)
81 		uc->jmaxlen = 65536;
82 	if (uc->nh_delete_delay == 0)
83 		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
84 	if (uc->pg_delete_delay == 0)
85 		uc->pg_delete_delay = NAT64LSN_PG_AGE;
86 	if (uc->st_syn_ttl == 0)
87 		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
88 	if (uc->st_close_ttl == 0)
89 		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
90 	if (uc->st_estab_ttl == 0)
91 		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
92 	if (uc->st_udp_ttl == 0)
93 		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
94 	if (uc->st_icmp_ttl == 0)
95 		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
96 
97 	if (uc->states_chunks == 0)
98 		uc->states_chunks = 1;
99 	else if (uc->states_chunks >= 128)
100 		uc->states_chunks = 128;
101 	else if (!powerof2(uc->states_chunks))
102 		uc->states_chunks = 1 << fls(uc->states_chunks);
103 }
104 
105 /*
106  * Creates new nat64lsn instance.
107  * Data layout (v0)(current):
108  * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
109  *
110  * Returns 0 on success
111  */
112 static int
113 nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
114     struct sockopt_data *sd)
115 {
116 	ipfw_obj_lheader *olh;
117 	ipfw_nat64lsn_cfg *uc;
118 	struct nat64lsn_cfg *cfg;
119 	struct namedobj_instance *ni;
120 	uint32_t addr4, mask4;
121 
122 	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
123 		return (EINVAL);
124 
125 	olh = (ipfw_obj_lheader *)sd->kbuf;
126 	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
127 
128 	if (ipfw_check_object_name_generic(uc->name) != 0)
129 		return (EINVAL);
130 
131 	if (uc->set >= IPFW_MAX_SETS)
132 		return (EINVAL);
133 
134 	if (uc->plen4 > 32)
135 		return (EINVAL);
136 
137 	/*
138 	 * Unspecified address has special meaning. But it must
139 	 * have valid prefix length. This length will be used to
140 	 * correctly extract and embedd IPv4 address into IPv6.
141 	 */
142 	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
143 	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
144 	    nat64_check_prefixlen(uc->plen6) != 0)
145 		return (EINVAL);
146 
147 	/* XXX: Check prefix4 to be global */
148 	addr4 = ntohl(uc->prefix4.s_addr);
149 	mask4 = ~((1 << (32 - uc->plen4)) - 1);
150 	if ((addr4 & mask4) != addr4)
151 		return (EINVAL);
152 
153 	nat64lsn_default_config(uc);
154 
155 	ni = CHAIN_TO_SRV(ch);
156 	IPFW_UH_RLOCK(ch);
157 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
158 		IPFW_UH_RUNLOCK(ch);
159 		return (EEXIST);
160 	}
161 	IPFW_UH_RUNLOCK(ch);
162 
163 	cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
164 	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
165 	cfg->no.name = cfg->name;
166 	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
167 	cfg->no.set = uc->set;
168 
169 	cfg->base.plat_prefix = uc->prefix6;
170 	cfg->base.plat_plen = uc->plen6;
171 	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
172 	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
173 		cfg->base.flags |= NAT64_WKPFX;
174 	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
175 		cfg->base.flags |= NAT64LSN_ANYPREFIX;
176 
177 	cfg->states_chunks = uc->states_chunks;
178 	cfg->jmaxlen = uc->jmaxlen;
179 	cfg->host_delete_delay = uc->nh_delete_delay;
180 	cfg->pg_delete_delay = uc->pg_delete_delay;
181 	cfg->st_syn_ttl = uc->st_syn_ttl;
182 	cfg->st_close_ttl = uc->st_close_ttl;
183 	cfg->st_estab_ttl = uc->st_estab_ttl;
184 	cfg->st_udp_ttl = uc->st_udp_ttl;
185 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
186 
187 	cfg->nomatch_verdict = IP_FW_DENY;
188 
189 	IPFW_UH_WLOCK(ch);
190 
191 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
192 		IPFW_UH_WUNLOCK(ch);
193 		nat64lsn_destroy_instance(cfg);
194 		return (EEXIST);
195 	}
196 
197 	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
198 		IPFW_UH_WUNLOCK(ch);
199 		nat64lsn_destroy_instance(cfg);
200 		return (ENOSPC);
201 	}
202 	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
203 
204 	/* Okay, let's link data */
205 	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
206 	nat64lsn_start_instance(cfg);
207 
208 	IPFW_UH_WUNLOCK(ch);
209 	return (0);
210 }
211 
212 static void
213 nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
214 {
215 
216 	IPFW_UH_WLOCK_ASSERT(ch);
217 
218 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
219 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
220 }
221 
222 /*
223  * Destroys nat64 instance.
224  * Data layout (v0)(current):
225  * Request: [ ipfw_obj_header ]
226  *
227  * Returns 0 on success
228  */
229 static int
230 nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
231     struct sockopt_data *sd)
232 {
233 	struct nat64lsn_cfg *cfg;
234 	ipfw_obj_header *oh;
235 
236 	if (sd->valsize != sizeof(*oh))
237 		return (EINVAL);
238 
239 	oh = (ipfw_obj_header *)op3;
240 
241 	IPFW_UH_WLOCK(ch);
242 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
243 	if (cfg == NULL) {
244 		IPFW_UH_WUNLOCK(ch);
245 		return (ENOENT);
246 	}
247 
248 	if (cfg->no.refcnt > 0) {
249 		IPFW_UH_WUNLOCK(ch);
250 		return (EBUSY);
251 	}
252 
253 	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
254 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
255 	nat64lsn_detach_config(ch, cfg);
256 	IPFW_UH_WUNLOCK(ch);
257 
258 	nat64lsn_destroy_instance(cfg);
259 	return (0);
260 }
261 
262 #define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
263 	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
264 static void
265 export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
266     struct ipfw_nat64lsn_stats *stats)
267 {
268 	struct nat64lsn_alias *alias;
269 	int i, j;
270 
271 	__COPY_STAT_FIELD(cfg, stats, opcnt64);
272 	__COPY_STAT_FIELD(cfg, stats, opcnt46);
273 	__COPY_STAT_FIELD(cfg, stats, ofrags);
274 	__COPY_STAT_FIELD(cfg, stats, ifrags);
275 	__COPY_STAT_FIELD(cfg, stats, oerrors);
276 	__COPY_STAT_FIELD(cfg, stats, noroute4);
277 	__COPY_STAT_FIELD(cfg, stats, noroute6);
278 	__COPY_STAT_FIELD(cfg, stats, nomatch4);
279 	__COPY_STAT_FIELD(cfg, stats, noproto);
280 	__COPY_STAT_FIELD(cfg, stats, nomem);
281 	__COPY_STAT_FIELD(cfg, stats, dropped);
282 
283 	__COPY_STAT_FIELD(cfg, stats, jcalls);
284 	__COPY_STAT_FIELD(cfg, stats, jrequests);
285 	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
286 	__COPY_STAT_FIELD(cfg, stats, jportreq);
287 	__COPY_STAT_FIELD(cfg, stats, jhostfails);
288 	__COPY_STAT_FIELD(cfg, stats, jportfails);
289 	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
290 	__COPY_STAT_FIELD(cfg, stats, jnomem);
291 	__COPY_STAT_FIELD(cfg, stats, jreinjected);
292 	__COPY_STAT_FIELD(cfg, stats, screated);
293 	__COPY_STAT_FIELD(cfg, stats, sdeleted);
294 	__COPY_STAT_FIELD(cfg, stats, spgcreated);
295 	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
296 
297 	stats->hostcount = cfg->hosts_count;
298 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
299 		alias = &cfg->aliases[i];
300 		for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
301 			stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
302 		for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
303 			stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
304 		for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
305 			stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
306 	}
307 }
308 #undef	__COPY_STAT_FIELD
309 
310 static void
311 nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
312     ipfw_nat64lsn_cfg *uc)
313 {
314 
315 	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
316 	uc->states_chunks = cfg->states_chunks;
317 	uc->jmaxlen = cfg->jmaxlen;
318 	uc->nh_delete_delay = cfg->host_delete_delay;
319 	uc->pg_delete_delay = cfg->pg_delete_delay;
320 	uc->st_syn_ttl = cfg->st_syn_ttl;
321 	uc->st_close_ttl = cfg->st_close_ttl;
322 	uc->st_estab_ttl = cfg->st_estab_ttl;
323 	uc->st_udp_ttl = cfg->st_udp_ttl;
324 	uc->st_icmp_ttl = cfg->st_icmp_ttl;
325 	uc->prefix4.s_addr = htonl(cfg->prefix4);
326 	uc->prefix6 = cfg->base.plat_prefix;
327 	uc->plen4 = cfg->plen4;
328 	uc->plen6 = cfg->base.plat_plen;
329 	uc->set = cfg->no.set;
330 	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
331 }
332 
333 struct nat64_dump_arg {
334 	struct ip_fw_chain *ch;
335 	struct sockopt_data *sd;
336 };
337 
338 static int
339 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
340     void *arg)
341 {
342 	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
343 	ipfw_nat64lsn_cfg *uc;
344 
345 	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
346 	    sizeof(*uc));
347 	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
348 	return (0);
349 }
350 
351 /*
352  * Lists all nat64 lsn instances currently available in kernel.
353  * Data layout (v0)(current):
354  * Request: [ ipfw_obj_lheader ]
355  * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
356  *
357  * Returns 0 on success
358  */
359 static int
360 nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
361     struct sockopt_data *sd)
362 {
363 	ipfw_obj_lheader *olh;
364 	struct nat64_dump_arg da;
365 
366 	/* Check minimum header size */
367 	if (sd->valsize < sizeof(ipfw_obj_lheader))
368 		return (EINVAL);
369 
370 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
371 
372 	IPFW_UH_RLOCK(ch);
373 	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
374 	    IPFW_TLV_NAT64LSN_NAME);
375 	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
376 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
377 
378 	if (sd->valsize < olh->size) {
379 		IPFW_UH_RUNLOCK(ch);
380 		return (ENOMEM);
381 	}
382 	memset(&da, 0, sizeof(da));
383 	da.ch = ch;
384 	da.sd = sd;
385 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
386 	    IPFW_TLV_NAT64LSN_NAME);
387 	IPFW_UH_RUNLOCK(ch);
388 
389 	return (0);
390 }
391 
392 /*
393  * Change existing nat64lsn instance configuration.
394  * Data layout (v0)(current):
395  * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
396  * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
397  *
398  * Returns 0 on success
399  */
400 static int
401 nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
402     struct sockopt_data *sd)
403 {
404 	ipfw_obj_header *oh;
405 	ipfw_nat64lsn_cfg *uc;
406 	struct nat64lsn_cfg *cfg;
407 	struct namedobj_instance *ni;
408 
409 	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
410 		return (EINVAL);
411 
412 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
413 	    sizeof(*oh) + sizeof(*uc));
414 	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
415 
416 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
417 	    oh->ntlv.set >= IPFW_MAX_SETS)
418 		return (EINVAL);
419 
420 	ni = CHAIN_TO_SRV(ch);
421 	if (sd->sopt->sopt_dir == SOPT_GET) {
422 		IPFW_UH_RLOCK(ch);
423 		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
424 		if (cfg == NULL) {
425 			IPFW_UH_RUNLOCK(ch);
426 			return (ENOENT);
427 		}
428 		nat64lsn_export_config(ch, cfg, uc);
429 		IPFW_UH_RUNLOCK(ch);
430 		return (0);
431 	}
432 
433 	nat64lsn_default_config(uc);
434 
435 	IPFW_UH_WLOCK(ch);
436 	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
437 	if (cfg == NULL) {
438 		IPFW_UH_WUNLOCK(ch);
439 		return (ENOENT);
440 	}
441 
442 	/*
443 	 * For now allow to change only following values:
444 	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
445 	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
446 	 */
447 
448 	cfg->states_chunks = uc->states_chunks;
449 	cfg->jmaxlen = uc->jmaxlen;
450 	cfg->host_delete_delay = uc->nh_delete_delay;
451 	cfg->pg_delete_delay = uc->pg_delete_delay;
452 	cfg->st_syn_ttl = uc->st_syn_ttl;
453 	cfg->st_close_ttl = uc->st_close_ttl;
454 	cfg->st_estab_ttl = uc->st_estab_ttl;
455 	cfg->st_udp_ttl = uc->st_udp_ttl;
456 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
457 	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
458 	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
459 
460 	IPFW_UH_WUNLOCK(ch);
461 
462 	return (0);
463 }
464 
465 /*
466  * Get nat64lsn statistics.
467  * Data layout (v0)(current):
468  * Request: [ ipfw_obj_header ]
469  * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
470  *
471  * Returns 0 on success
472  */
473 static int
474 nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
475     struct sockopt_data *sd)
476 {
477 	struct ipfw_nat64lsn_stats stats;
478 	struct nat64lsn_cfg *cfg;
479 	ipfw_obj_header *oh;
480 	ipfw_obj_ctlv *ctlv;
481 	size_t sz;
482 
483 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
484 	if (sd->valsize % sizeof(uint64_t))
485 		return (EINVAL);
486 	if (sd->valsize < sz)
487 		return (ENOMEM);
488 	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
489 	if (oh == NULL)
490 		return (EINVAL);
491 	memset(&stats, 0, sizeof(stats));
492 
493 	IPFW_UH_RLOCK(ch);
494 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
495 	if (cfg == NULL) {
496 		IPFW_UH_RUNLOCK(ch);
497 		return (ENOENT);
498 	}
499 
500 	export_stats(ch, cfg, &stats);
501 	IPFW_UH_RUNLOCK(ch);
502 
503 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
504 	memset(ctlv, 0, sizeof(*ctlv));
505 	ctlv->head.type = IPFW_TLV_COUNTERS;
506 	ctlv->head.length = sz - sizeof(ipfw_obj_header);
507 	ctlv->count = sizeof(stats) / sizeof(uint64_t);
508 	ctlv->objsize = sizeof(uint64_t);
509 	ctlv->version = IPFW_NAT64_VERSION;
510 	memcpy(ctlv + 1, &stats, sizeof(stats));
511 	return (0);
512 }
513 
514 /*
515  * Reset nat64lsn statistics.
516  * Data layout (v0)(current):
517  * Request: [ ipfw_obj_header ]
518  *
519  * Returns 0 on success
520  */
521 static int
522 nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
523     struct sockopt_data *sd)
524 {
525 	struct nat64lsn_cfg *cfg;
526 	ipfw_obj_header *oh;
527 
528 	if (sd->valsize != sizeof(*oh))
529 		return (EINVAL);
530 	oh = (ipfw_obj_header *)sd->kbuf;
531 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
532 	    oh->ntlv.set >= IPFW_MAX_SETS)
533 		return (EINVAL);
534 
535 	IPFW_UH_WLOCK(ch);
536 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
537 	if (cfg == NULL) {
538 		IPFW_UH_WUNLOCK(ch);
539 		return (ENOENT);
540 	}
541 	COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
542 	IPFW_UH_WUNLOCK(ch);
543 	return (0);
544 }
545 
546 #ifdef __LP64__
547 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
548 #else
549 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
550     ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
551 #endif
552 /*
553  * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
554  *	ipfw_nat64lsn_state x count, ... ] ]
555  */
556 static int
557 nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
558     struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
559 {
560 	ipfw_nat64lsn_state_v1 *s;
561 	struct nat64lsn_state *state;
562 	uint64_t freemask;
563 	uint32_t i, count;
564 
565 	/* validate user input */
566 	if (idx->chunk > pg->chunks_count - 1)
567 		return (EINVAL);
568 
569 	FREEMASK_COPY(pg, idx->chunk, freemask);
570 	count = 64 - bitcount64(freemask);
571 	if (count == 0)
572 		return (0);	/* Try next PG/chunk */
573 
574 	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
575 	    (uintmax_t)idx->index, count);
576 
577 	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
578 	    count * sizeof(ipfw_nat64lsn_state_v1));
579 	if (s == NULL)
580 		return (ENOMEM);
581 
582 	for (i = 0; i < 64; i++) {
583 		if (ISSET64(freemask, i))
584 			continue;
585 		state = pg->chunks_count == 1 ? &pg->states->state[i] :
586 		    &pg->states_chunk[idx->chunk]->state[i];
587 
588 		s->host6 = state->host->addr;
589 		s->daddr.s_addr = htonl(state->ip_dst);
590 		s->dport = state->dport;
591 		s->sport = state->sport;
592 		s->aport = state->aport;
593 		s->flags = (uint8_t)(state->flags & 7);
594 		s->proto = state->proto;
595 		s->idle = GET_AGE(state->timestamp);
596 		s++;
597 	}
598 	*ret_count = count;
599 	return (0);
600 }
601 
602 #define	LAST_IDX	0xFF
603 static int
604 nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
605     union nat64lsn_pgidx *idx)
606 {
607 
608 	/* First iterate over chunks */
609 	if (pg != NULL) {
610 		if (idx->chunk < pg->chunks_count - 1) {
611 			idx->chunk++;
612 			return (0);
613 		}
614 	}
615 	idx->chunk = 0;
616 	/* Then over PGs */
617 	if (idx->port < UINT16_MAX - 64) {
618 		idx->port += 64;
619 		return (0);
620 	}
621 	idx->port = NAT64_MIN_PORT;
622 	/* Then over supported protocols */
623 	switch (idx->proto) {
624 	case IPPROTO_ICMP:
625 		idx->proto = IPPROTO_TCP;
626 		return (0);
627 	case IPPROTO_TCP:
628 		idx->proto = IPPROTO_UDP;
629 		return (0);
630 	default:
631 		idx->proto = IPPROTO_ICMP;
632 	}
633 	/* And then over IPv4 alias addresses */
634 	if (idx->addr < cfg->pmask4) {
635 		idx->addr++;
636 		return (1);	/* New states group is needed */
637 	}
638 	idx->index = LAST_IDX;
639 	return (-1);		/* No more states */
640 }
641 
642 static struct nat64lsn_pg*
643 nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
644 {
645 	struct nat64lsn_alias *alias;
646 	int pg_idx;
647 
648 	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
649 	MPASS(alias->addr == idx->addr);
650 
651 	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
652 	switch (idx->proto) {
653 	case IPPROTO_ICMP:
654 		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
655 			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
656 		break;
657 	case IPPROTO_TCP:
658 		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
659 			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
660 		break;
661 	case IPPROTO_UDP:
662 		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
663 			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
664 		break;
665 	}
666 	return (NULL);
667 }
668 
669 /*
670  * Lists nat64lsn states.
671  * Data layout (v0):
672  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
673  * Reply: [ ipfw_obj_header ipfw_obj_data [
674  *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
675  *
676  * Returns 0 on success
677  */
678 static int
679 nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
680     struct sockopt_data *sd)
681 {
682 
683 	/* TODO: implement states listing for old ipfw(8) binaries  */
684 	return (EOPNOTSUPP);
685 }
686 
687 /*
688  * Lists nat64lsn states.
689  * Data layout (v1)(current):
690  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
691  * Reply: [ ipfw_obj_header ipfw_obj_data [
692  *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
693  *
694  * Returns 0 on success
695  */
696 static int
697 nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
698     struct sockopt_data *sd)
699 {
700 	ipfw_obj_header *oh;
701 	ipfw_obj_data *od;
702 	ipfw_nat64lsn_stg_v1 *stg;
703 	struct nat64lsn_cfg *cfg;
704 	struct nat64lsn_pg *pg;
705 	union nat64lsn_pgidx idx;
706 	size_t sz;
707 	uint32_t count, total;
708 	int ret;
709 
710 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
711 	    sizeof(uint64_t);
712 	/* Check minimum header size */
713 	if (sd->valsize < sz)
714 		return (EINVAL);
715 
716 	oh = (ipfw_obj_header *)sd->kbuf;
717 	od = (ipfw_obj_data *)(oh + 1);
718 	if (od->head.type != IPFW_TLV_OBJDATA ||
719 	    od->head.length != sz - sizeof(ipfw_obj_header))
720 		return (EINVAL);
721 
722 	idx.index = *(uint64_t *)(od + 1);
723 	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
724 	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
725 		return (EINVAL);
726 	if (idx.index == LAST_IDX)
727 		return (EINVAL);
728 
729 	IPFW_UH_RLOCK(ch);
730 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
731 	if (cfg == NULL) {
732 		IPFW_UH_RUNLOCK(ch);
733 		return (ENOENT);
734 	}
735 	if (idx.index == 0) {	/* Fill in starting point */
736 		idx.addr = cfg->prefix4;
737 		idx.proto = IPPROTO_ICMP;
738 		idx.port = NAT64_MIN_PORT;
739 	}
740 	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
741 	    idx.port < NAT64_MIN_PORT) {
742 		IPFW_UH_RUNLOCK(ch);
743 		return (EINVAL);
744 	}
745 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
746 	    sizeof(ipfw_nat64lsn_stg_v1);
747 	if (sd->valsize < sz) {
748 		IPFW_UH_RUNLOCK(ch);
749 		return (ENOMEM);
750 	}
751 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
752 	od = (ipfw_obj_data *)(oh + 1);
753 	od->head.type = IPFW_TLV_OBJDATA;
754 	od->head.length = sz - sizeof(ipfw_obj_header);
755 	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
756 	stg->count = total = 0;
757 	stg->next.index = idx.index;
758 	/*
759 	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
760 	 * Thus states, hosts and PGs will not expire while we hold it.
761 	 */
762 	CALLOUT_LOCK(cfg);
763 	ret = 0;
764 	do {
765 		pg = nat64lsn_get_pg_byidx(cfg, &idx);
766 		if (pg != NULL) {
767 			count = 0;
768 			ret = nat64lsn_export_states_v1(cfg, &idx, pg,
769 			    sd, &count);
770 			if (ret != 0)
771 				break;
772 			if (count > 0) {
773 				stg->count += count;
774 				total += count;
775 				/* Update total size of reply */
776 				od->head.length +=
777 				    count * sizeof(ipfw_nat64lsn_state_v1);
778 				sz += count * sizeof(ipfw_nat64lsn_state_v1);
779 			}
780 			stg->alias4.s_addr = htonl(idx.addr);
781 		}
782 		/* Determine new index */
783 		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
784 		case -1:
785 			ret = ENOENT; /* End of search */
786 			break;
787 		case 1: /*
788 			 * Next alias address, new group may be needed.
789 			 * If states count is zero, use this group.
790 			 */
791 			if (stg->count == 0)
792 				continue;
793 			/* Otherwise try to create new group */
794 			sz += sizeof(ipfw_nat64lsn_stg_v1);
795 			if (sd->valsize < sz) {
796 				ret = ENOMEM;
797 				break;
798 			}
799 			/* Save next index in current group */
800 			stg->next.index = idx.index;
801 			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
802 			    sizeof(ipfw_nat64lsn_stg_v1));
803 			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
804 			stg->count = 0;
805 			break;
806 		}
807 		stg->next.index = idx.index;
808 	} while (ret == 0);
809 	CALLOUT_UNLOCK(cfg);
810 	IPFW_UH_RUNLOCK(ch);
811 	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
812 }
813 
814 static struct ipfw_sopt_handler	scodes[] = {
815 	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
816 	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
817 	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
818 	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
819 	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
820 	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
821 	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states_v0 },
822 	{ IP_FW_NAT64LSN_LIST_STATES,1,	HDIR_GET,	nat64lsn_states_v1 },
823 };
824 
825 static int
826 nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
827 {
828 	ipfw_insn *icmd;
829 
830 	icmd = cmd - 1;
831 	if (icmd->opcode != O_EXTERNAL_ACTION ||
832 	    icmd->arg1 != V_nat64lsn_eid)
833 		return (1);
834 
835 	*puidx = cmd->arg1;
836 	*ptype = 0;
837 	return (0);
838 }
839 
840 static void
841 nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
842 {
843 
844 	cmd->arg1 = idx;
845 }
846 
847 static int
848 nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
849     struct named_object **pno)
850 {
851 	int err;
852 
853 	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
854 	    IPFW_TLV_NAT64LSN_NAME, pno);
855 	return (err);
856 }
857 
858 static struct named_object *
859 nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
860 {
861 	struct namedobj_instance *ni;
862 	struct named_object *no;
863 
864 	IPFW_UH_WLOCK_ASSERT(ch);
865 	ni = CHAIN_TO_SRV(ch);
866 	no = ipfw_objhash_lookup_kidx(ni, idx);
867 	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
868 
869 	return (no);
870 }
871 
872 static int
873 nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
874     enum ipfw_sets_cmd cmd)
875 {
876 
877 	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
878 	    set, new_set, cmd));
879 }
880 
881 static struct opcode_obj_rewrite opcodes[] = {
882 	{
883 		.opcode = O_EXTERNAL_INSTANCE,
884 		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
885 		.classifier = nat64lsn_classify,
886 		.update = nat64lsn_update_arg1,
887 		.find_byname = nat64lsn_findbyname,
888 		.find_bykidx = nat64lsn_findbykidx,
889 		.manage_sets = nat64lsn_manage_sets,
890 	},
891 };
892 
893 static int
894 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
895     void *arg)
896 {
897 	struct nat64lsn_cfg *cfg;
898 	struct ip_fw_chain *ch;
899 
900 	ch = (struct ip_fw_chain *)arg;
901 	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
902 	SRV_OBJECT(ch, no->kidx) = NULL;
903 	nat64lsn_detach_config(ch, cfg);
904 	nat64lsn_destroy_instance(cfg);
905 	return (0);
906 }
907 
908 int
909 nat64lsn_init(struct ip_fw_chain *ch, int first)
910 {
911 
912 	if (first != 0)
913 		nat64lsn_init_internal();
914 	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
915 	if (V_nat64lsn_eid == 0)
916 		return (ENXIO);
917 	IPFW_ADD_SOPT_HANDLER(first, scodes);
918 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
919 	return (0);
920 }
921 
922 void
923 nat64lsn_uninit(struct ip_fw_chain *ch, int last)
924 {
925 
926 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
927 	IPFW_DEL_SOPT_HANDLER(last, scodes);
928 	ipfw_del_eaction(ch, V_nat64lsn_eid);
929 	/*
930 	 * Since we already have deregistered external action,
931 	 * our named objects become unaccessible via rules, because
932 	 * all rules were truncated by ipfw_del_eaction().
933 	 * So, we can unlink and destroy our named objects without holding
934 	 * IPFW_WLOCK().
935 	 */
936 	IPFW_UH_WLOCK(ch);
937 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
938 	    IPFW_TLV_NAT64LSN_NAME);
939 	V_nat64lsn_eid = 0;
940 	IPFW_UH_WUNLOCK(ch);
941 	if (last != 0)
942 		nat64lsn_uninit_internal();
943 }
944