xref: /freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/counter.h>
34 #include <sys/ck.h>
35 #include <sys/epoch.h>
36 #include <sys/errno.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rmlock.h>
43 #include <sys/rwlock.h>
44 #include <sys/socket.h>
45 #include <sys/sockopt.h>
46 
47 #include <net/if.h>
48 
49 #include <netinet/in.h>
50 #include <netinet/ip.h>
51 #include <netinet/ip_var.h>
52 #include <netinet/ip_fw.h>
53 #include <netinet6/ip_fw_nat64.h>
54 
55 #include <netpfil/ipfw/ip_fw_private.h>
56 
57 #include "nat64lsn.h"
58 
59 VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
60 
61 static struct nat64lsn_cfg *
62 nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
63 {
64 	struct nat64lsn_cfg *cfg;
65 
66 	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
67 	    IPFW_TLV_NAT64LSN_NAME, name);
68 
69 	return (cfg);
70 }
71 
72 static void
73 nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
74 {
75 
76 	if (uc->jmaxlen == 0)
77 		uc->jmaxlen = NAT64LSN_JMAXLEN;
78 	if (uc->jmaxlen > 65536)
79 		uc->jmaxlen = 65536;
80 	if (uc->nh_delete_delay == 0)
81 		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
82 	if (uc->pg_delete_delay == 0)
83 		uc->pg_delete_delay = NAT64LSN_PG_AGE;
84 	if (uc->st_syn_ttl == 0)
85 		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
86 	if (uc->st_close_ttl == 0)
87 		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
88 	if (uc->st_estab_ttl == 0)
89 		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
90 	if (uc->st_udp_ttl == 0)
91 		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
92 	if (uc->st_icmp_ttl == 0)
93 		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
94 
95 	if (uc->states_chunks == 0)
96 		uc->states_chunks = 1;
97 	else if (uc->states_chunks >= 128)
98 		uc->states_chunks = 128;
99 	else if (!powerof2(uc->states_chunks))
100 		uc->states_chunks = 1 << fls(uc->states_chunks);
101 }
102 
103 /*
104  * Creates new nat64lsn instance.
105  * Data layout (v0)(current):
106  * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
107  *
108  * Returns 0 on success
109  */
110 static int
111 nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
112     struct sockopt_data *sd)
113 {
114 	ipfw_obj_lheader *olh;
115 	ipfw_nat64lsn_cfg *uc;
116 	struct nat64lsn_cfg *cfg;
117 	struct namedobj_instance *ni;
118 	uint32_t addr4, mask4;
119 
120 	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
121 		return (EINVAL);
122 
123 	olh = (ipfw_obj_lheader *)sd->kbuf;
124 	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
125 
126 	if (ipfw_check_object_name_generic(uc->name) != 0)
127 		return (EINVAL);
128 
129 	if (uc->set >= IPFW_MAX_SETS)
130 		return (EINVAL);
131 
132 	if (uc->plen4 > 32)
133 		return (EINVAL);
134 
135 	/*
136 	 * Unspecified address has special meaning. But it must
137 	 * have valid prefix length. This length will be used to
138 	 * correctly extract and embedd IPv4 address into IPv6.
139 	 */
140 	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
141 	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
142 	    nat64_check_prefixlen(uc->plen6) != 0)
143 		return (EINVAL);
144 
145 	/* XXX: Check prefix4 to be global */
146 	addr4 = ntohl(uc->prefix4.s_addr);
147 	mask4 = ~((1 << (32 - uc->plen4)) - 1);
148 	if ((addr4 & mask4) != addr4)
149 		return (EINVAL);
150 
151 	nat64lsn_default_config(uc);
152 
153 	ni = CHAIN_TO_SRV(ch);
154 	IPFW_UH_RLOCK(ch);
155 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
156 		IPFW_UH_RUNLOCK(ch);
157 		return (EEXIST);
158 	}
159 	IPFW_UH_RUNLOCK(ch);
160 
161 	cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
162 	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
163 	cfg->no.name = cfg->name;
164 	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
165 	cfg->no.set = uc->set;
166 
167 	cfg->base.plat_prefix = uc->prefix6;
168 	cfg->base.plat_plen = uc->plen6;
169 	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
170 	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
171 		cfg->base.flags |= NAT64_WKPFX;
172 	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
173 		cfg->base.flags |= NAT64LSN_ANYPREFIX;
174 
175 	cfg->states_chunks = uc->states_chunks;
176 	cfg->jmaxlen = uc->jmaxlen;
177 	cfg->host_delete_delay = uc->nh_delete_delay;
178 	cfg->pg_delete_delay = uc->pg_delete_delay;
179 	cfg->st_syn_ttl = uc->st_syn_ttl;
180 	cfg->st_close_ttl = uc->st_close_ttl;
181 	cfg->st_estab_ttl = uc->st_estab_ttl;
182 	cfg->st_udp_ttl = uc->st_udp_ttl;
183 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
184 
185 	cfg->nomatch_verdict = IP_FW_DENY;
186 
187 	IPFW_UH_WLOCK(ch);
188 
189 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
190 		IPFW_UH_WUNLOCK(ch);
191 		nat64lsn_destroy_instance(cfg);
192 		return (EEXIST);
193 	}
194 
195 	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
196 		IPFW_UH_WUNLOCK(ch);
197 		nat64lsn_destroy_instance(cfg);
198 		return (ENOSPC);
199 	}
200 	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
201 
202 	/* Okay, let's link data */
203 	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
204 	nat64lsn_start_instance(cfg);
205 
206 	IPFW_UH_WUNLOCK(ch);
207 	return (0);
208 }
209 
210 static void
211 nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
212 {
213 
214 	IPFW_UH_WLOCK_ASSERT(ch);
215 
216 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
217 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
218 }
219 
220 /*
221  * Destroys nat64 instance.
222  * Data layout (v0)(current):
223  * Request: [ ipfw_obj_header ]
224  *
225  * Returns 0 on success
226  */
227 static int
228 nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
229     struct sockopt_data *sd)
230 {
231 	struct nat64lsn_cfg *cfg;
232 	ipfw_obj_header *oh;
233 
234 	if (sd->valsize != sizeof(*oh))
235 		return (EINVAL);
236 
237 	oh = (ipfw_obj_header *)op3;
238 
239 	IPFW_UH_WLOCK(ch);
240 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
241 	if (cfg == NULL) {
242 		IPFW_UH_WUNLOCK(ch);
243 		return (ENOENT);
244 	}
245 
246 	if (cfg->no.refcnt > 0) {
247 		IPFW_UH_WUNLOCK(ch);
248 		return (EBUSY);
249 	}
250 
251 	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
252 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
253 	nat64lsn_detach_config(ch, cfg);
254 	IPFW_UH_WUNLOCK(ch);
255 
256 	nat64lsn_destroy_instance(cfg);
257 	return (0);
258 }
259 
260 #define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
261 	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
262 static void
263 export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
264     struct ipfw_nat64lsn_stats *stats)
265 {
266 	struct nat64lsn_alias *alias;
267 	int i, j;
268 
269 	__COPY_STAT_FIELD(cfg, stats, opcnt64);
270 	__COPY_STAT_FIELD(cfg, stats, opcnt46);
271 	__COPY_STAT_FIELD(cfg, stats, ofrags);
272 	__COPY_STAT_FIELD(cfg, stats, ifrags);
273 	__COPY_STAT_FIELD(cfg, stats, oerrors);
274 	__COPY_STAT_FIELD(cfg, stats, noroute4);
275 	__COPY_STAT_FIELD(cfg, stats, noroute6);
276 	__COPY_STAT_FIELD(cfg, stats, nomatch4);
277 	__COPY_STAT_FIELD(cfg, stats, noproto);
278 	__COPY_STAT_FIELD(cfg, stats, nomem);
279 	__COPY_STAT_FIELD(cfg, stats, dropped);
280 
281 	__COPY_STAT_FIELD(cfg, stats, jcalls);
282 	__COPY_STAT_FIELD(cfg, stats, jrequests);
283 	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
284 	__COPY_STAT_FIELD(cfg, stats, jportreq);
285 	__COPY_STAT_FIELD(cfg, stats, jhostfails);
286 	__COPY_STAT_FIELD(cfg, stats, jportfails);
287 	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
288 	__COPY_STAT_FIELD(cfg, stats, jnomem);
289 	__COPY_STAT_FIELD(cfg, stats, jreinjected);
290 	__COPY_STAT_FIELD(cfg, stats, screated);
291 	__COPY_STAT_FIELD(cfg, stats, sdeleted);
292 	__COPY_STAT_FIELD(cfg, stats, spgcreated);
293 	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
294 
295 	stats->hostcount = cfg->hosts_count;
296 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
297 		alias = &cfg->aliases[i];
298 		for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
299 			stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
300 		for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
301 			stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
302 		for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
303 			stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
304 	}
305 }
306 #undef	__COPY_STAT_FIELD
307 
308 static void
309 nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
310     ipfw_nat64lsn_cfg *uc)
311 {
312 
313 	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
314 	uc->states_chunks = cfg->states_chunks;
315 	uc->jmaxlen = cfg->jmaxlen;
316 	uc->nh_delete_delay = cfg->host_delete_delay;
317 	uc->pg_delete_delay = cfg->pg_delete_delay;
318 	uc->st_syn_ttl = cfg->st_syn_ttl;
319 	uc->st_close_ttl = cfg->st_close_ttl;
320 	uc->st_estab_ttl = cfg->st_estab_ttl;
321 	uc->st_udp_ttl = cfg->st_udp_ttl;
322 	uc->st_icmp_ttl = cfg->st_icmp_ttl;
323 	uc->prefix4.s_addr = htonl(cfg->prefix4);
324 	uc->prefix6 = cfg->base.plat_prefix;
325 	uc->plen4 = cfg->plen4;
326 	uc->plen6 = cfg->base.plat_plen;
327 	uc->set = cfg->no.set;
328 	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
329 }
330 
331 struct nat64_dump_arg {
332 	struct ip_fw_chain *ch;
333 	struct sockopt_data *sd;
334 };
335 
336 static int
337 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
338     void *arg)
339 {
340 	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
341 	ipfw_nat64lsn_cfg *uc;
342 
343 	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
344 	    sizeof(*uc));
345 	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
346 	return (0);
347 }
348 
349 /*
350  * Lists all nat64 lsn instances currently available in kernel.
351  * Data layout (v0)(current):
352  * Request: [ ipfw_obj_lheader ]
353  * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
354  *
355  * Returns 0 on success
356  */
357 static int
358 nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
359     struct sockopt_data *sd)
360 {
361 	ipfw_obj_lheader *olh;
362 	struct nat64_dump_arg da;
363 
364 	/* Check minimum header size */
365 	if (sd->valsize < sizeof(ipfw_obj_lheader))
366 		return (EINVAL);
367 
368 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
369 
370 	IPFW_UH_RLOCK(ch);
371 	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
372 	    IPFW_TLV_NAT64LSN_NAME);
373 	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
374 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
375 
376 	if (sd->valsize < olh->size) {
377 		IPFW_UH_RUNLOCK(ch);
378 		return (ENOMEM);
379 	}
380 	memset(&da, 0, sizeof(da));
381 	da.ch = ch;
382 	da.sd = sd;
383 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
384 	    IPFW_TLV_NAT64LSN_NAME);
385 	IPFW_UH_RUNLOCK(ch);
386 
387 	return (0);
388 }
389 
390 /*
391  * Change existing nat64lsn instance configuration.
392  * Data layout (v0)(current):
393  * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
394  * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
395  *
396  * Returns 0 on success
397  */
398 static int
399 nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
400     struct sockopt_data *sd)
401 {
402 	ipfw_obj_header *oh;
403 	ipfw_nat64lsn_cfg *uc;
404 	struct nat64lsn_cfg *cfg;
405 	struct namedobj_instance *ni;
406 
407 	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
408 		return (EINVAL);
409 
410 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
411 	    sizeof(*oh) + sizeof(*uc));
412 	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
413 
414 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
415 	    oh->ntlv.set >= IPFW_MAX_SETS)
416 		return (EINVAL);
417 
418 	ni = CHAIN_TO_SRV(ch);
419 	if (sd->sopt->sopt_dir == SOPT_GET) {
420 		IPFW_UH_RLOCK(ch);
421 		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
422 		if (cfg == NULL) {
423 			IPFW_UH_RUNLOCK(ch);
424 			return (ENOENT);
425 		}
426 		nat64lsn_export_config(ch, cfg, uc);
427 		IPFW_UH_RUNLOCK(ch);
428 		return (0);
429 	}
430 
431 	nat64lsn_default_config(uc);
432 
433 	IPFW_UH_WLOCK(ch);
434 	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
435 	if (cfg == NULL) {
436 		IPFW_UH_WUNLOCK(ch);
437 		return (ENOENT);
438 	}
439 
440 	/*
441 	 * For now allow to change only following values:
442 	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
443 	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
444 	 */
445 
446 	cfg->states_chunks = uc->states_chunks;
447 	cfg->jmaxlen = uc->jmaxlen;
448 	cfg->host_delete_delay = uc->nh_delete_delay;
449 	cfg->pg_delete_delay = uc->pg_delete_delay;
450 	cfg->st_syn_ttl = uc->st_syn_ttl;
451 	cfg->st_close_ttl = uc->st_close_ttl;
452 	cfg->st_estab_ttl = uc->st_estab_ttl;
453 	cfg->st_udp_ttl = uc->st_udp_ttl;
454 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
455 	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
456 	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
457 
458 	IPFW_UH_WUNLOCK(ch);
459 
460 	return (0);
461 }
462 
463 /*
464  * Get nat64lsn statistics.
465  * Data layout (v0)(current):
466  * Request: [ ipfw_obj_header ]
467  * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
468  *
469  * Returns 0 on success
470  */
471 static int
472 nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
473     struct sockopt_data *sd)
474 {
475 	struct ipfw_nat64lsn_stats stats;
476 	struct nat64lsn_cfg *cfg;
477 	ipfw_obj_header *oh;
478 	ipfw_obj_ctlv *ctlv;
479 	size_t sz;
480 
481 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
482 	if (sd->valsize % sizeof(uint64_t))
483 		return (EINVAL);
484 	if (sd->valsize < sz)
485 		return (ENOMEM);
486 	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
487 	if (oh == NULL)
488 		return (EINVAL);
489 	memset(&stats, 0, sizeof(stats));
490 
491 	IPFW_UH_RLOCK(ch);
492 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
493 	if (cfg == NULL) {
494 		IPFW_UH_RUNLOCK(ch);
495 		return (ENOENT);
496 	}
497 
498 	export_stats(ch, cfg, &stats);
499 	IPFW_UH_RUNLOCK(ch);
500 
501 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
502 	memset(ctlv, 0, sizeof(*ctlv));
503 	ctlv->head.type = IPFW_TLV_COUNTERS;
504 	ctlv->head.length = sz - sizeof(ipfw_obj_header);
505 	ctlv->count = sizeof(stats) / sizeof(uint64_t);
506 	ctlv->objsize = sizeof(uint64_t);
507 	ctlv->version = IPFW_NAT64_VERSION;
508 	memcpy(ctlv + 1, &stats, sizeof(stats));
509 	return (0);
510 }
511 
512 /*
513  * Reset nat64lsn statistics.
514  * Data layout (v0)(current):
515  * Request: [ ipfw_obj_header ]
516  *
517  * Returns 0 on success
518  */
519 static int
520 nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
521     struct sockopt_data *sd)
522 {
523 	struct nat64lsn_cfg *cfg;
524 	ipfw_obj_header *oh;
525 
526 	if (sd->valsize != sizeof(*oh))
527 		return (EINVAL);
528 	oh = (ipfw_obj_header *)sd->kbuf;
529 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
530 	    oh->ntlv.set >= IPFW_MAX_SETS)
531 		return (EINVAL);
532 
533 	IPFW_UH_WLOCK(ch);
534 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
535 	if (cfg == NULL) {
536 		IPFW_UH_WUNLOCK(ch);
537 		return (ENOENT);
538 	}
539 	COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
540 	IPFW_UH_WUNLOCK(ch);
541 	return (0);
542 }
543 
544 #ifdef __LP64__
545 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
546 #else
547 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
548     ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
549 #endif
550 /*
551  * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
552  *	ipfw_nat64lsn_state x count, ... ] ]
553  */
554 static int
555 nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
556     struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
557 {
558 	ipfw_nat64lsn_state_v1 *s;
559 	struct nat64lsn_state *state;
560 	uint64_t freemask;
561 	uint32_t i, count;
562 
563 	/* validate user input */
564 	if (idx->chunk > pg->chunks_count - 1)
565 		return (EINVAL);
566 
567 	FREEMASK_COPY(pg, idx->chunk, freemask);
568 	count = 64 - bitcount64(freemask);
569 	if (count == 0)
570 		return (0);	/* Try next PG/chunk */
571 
572 	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
573 	    (uintmax_t)idx->index, count);
574 
575 	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
576 	    count * sizeof(ipfw_nat64lsn_state_v1));
577 	if (s == NULL)
578 		return (ENOMEM);
579 
580 	for (i = 0; i < 64; i++) {
581 		if (ISSET64(freemask, i))
582 			continue;
583 		state = pg->chunks_count == 1 ? &pg->states->state[i] :
584 		    &pg->states_chunk[idx->chunk]->state[i];
585 
586 		s->host6 = state->host->addr;
587 		s->daddr.s_addr = htonl(state->ip_dst);
588 		s->dport = state->dport;
589 		s->sport = state->sport;
590 		s->aport = state->aport;
591 		s->flags = (uint8_t)(state->flags & 7);
592 		s->proto = state->proto;
593 		s->idle = GET_AGE(state->timestamp);
594 		s++;
595 	}
596 	*ret_count = count;
597 	return (0);
598 }
599 
600 #define	LAST_IDX	0xFF
601 static int
602 nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
603     union nat64lsn_pgidx *idx)
604 {
605 
606 	/* First iterate over chunks */
607 	if (pg != NULL) {
608 		if (idx->chunk < pg->chunks_count - 1) {
609 			idx->chunk++;
610 			return (0);
611 		}
612 	}
613 	idx->chunk = 0;
614 	/* Then over PGs */
615 	if (idx->port < UINT16_MAX - 64) {
616 		idx->port += 64;
617 		return (0);
618 	}
619 	idx->port = NAT64_MIN_PORT;
620 	/* Then over supported protocols */
621 	switch (idx->proto) {
622 	case IPPROTO_ICMP:
623 		idx->proto = IPPROTO_TCP;
624 		return (0);
625 	case IPPROTO_TCP:
626 		idx->proto = IPPROTO_UDP;
627 		return (0);
628 	default:
629 		idx->proto = IPPROTO_ICMP;
630 	}
631 	/* And then over IPv4 alias addresses */
632 	if (idx->addr < cfg->pmask4) {
633 		idx->addr++;
634 		return (1);	/* New states group is needed */
635 	}
636 	idx->index = LAST_IDX;
637 	return (-1);		/* No more states */
638 }
639 
640 static struct nat64lsn_pg*
641 nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
642 {
643 	struct nat64lsn_alias *alias;
644 	int pg_idx;
645 
646 	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
647 	MPASS(alias->addr == idx->addr);
648 
649 	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
650 	switch (idx->proto) {
651 	case IPPROTO_ICMP:
652 		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
653 			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
654 		break;
655 	case IPPROTO_TCP:
656 		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
657 			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
658 		break;
659 	case IPPROTO_UDP:
660 		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
661 			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
662 		break;
663 	}
664 	return (NULL);
665 }
666 
667 /*
668  * Lists nat64lsn states.
669  * Data layout (v0):
670  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
671  * Reply: [ ipfw_obj_header ipfw_obj_data [
672  *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
673  *
674  * Returns 0 on success
675  */
676 static int
677 nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
678     struct sockopt_data *sd)
679 {
680 
681 	/* TODO: implement states listing for old ipfw(8) binaries  */
682 	return (EOPNOTSUPP);
683 }
684 
685 /*
686  * Lists nat64lsn states.
687  * Data layout (v1)(current):
688  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
689  * Reply: [ ipfw_obj_header ipfw_obj_data [
690  *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
691  *
692  * Returns 0 on success
693  */
694 static int
695 nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
696     struct sockopt_data *sd)
697 {
698 	ipfw_obj_header *oh;
699 	ipfw_obj_data *od;
700 	ipfw_nat64lsn_stg_v1 *stg;
701 	struct nat64lsn_cfg *cfg;
702 	struct nat64lsn_pg *pg;
703 	union nat64lsn_pgidx idx;
704 	size_t sz;
705 	uint32_t count, total;
706 	int ret;
707 
708 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
709 	    sizeof(uint64_t);
710 	/* Check minimum header size */
711 	if (sd->valsize < sz)
712 		return (EINVAL);
713 
714 	oh = (ipfw_obj_header *)sd->kbuf;
715 	od = (ipfw_obj_data *)(oh + 1);
716 	if (od->head.type != IPFW_TLV_OBJDATA ||
717 	    od->head.length != sz - sizeof(ipfw_obj_header))
718 		return (EINVAL);
719 
720 	idx.index = *(uint64_t *)(od + 1);
721 	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
722 	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
723 		return (EINVAL);
724 	if (idx.index == LAST_IDX)
725 		return (EINVAL);
726 
727 	IPFW_UH_RLOCK(ch);
728 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
729 	if (cfg == NULL) {
730 		IPFW_UH_RUNLOCK(ch);
731 		return (ENOENT);
732 	}
733 	if (idx.index == 0) {	/* Fill in starting point */
734 		idx.addr = cfg->prefix4;
735 		idx.proto = IPPROTO_ICMP;
736 		idx.port = NAT64_MIN_PORT;
737 	}
738 	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
739 	    idx.port < NAT64_MIN_PORT) {
740 		IPFW_UH_RUNLOCK(ch);
741 		return (EINVAL);
742 	}
743 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
744 	    sizeof(ipfw_nat64lsn_stg_v1);
745 	if (sd->valsize < sz) {
746 		IPFW_UH_RUNLOCK(ch);
747 		return (ENOMEM);
748 	}
749 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
750 	od = (ipfw_obj_data *)(oh + 1);
751 	od->head.type = IPFW_TLV_OBJDATA;
752 	od->head.length = sz - sizeof(ipfw_obj_header);
753 	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
754 	stg->count = total = 0;
755 	stg->next.index = idx.index;
756 	/*
757 	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
758 	 * Thus states, hosts and PGs will not expire while we hold it.
759 	 */
760 	CALLOUT_LOCK(cfg);
761 	ret = 0;
762 	do {
763 		pg = nat64lsn_get_pg_byidx(cfg, &idx);
764 		if (pg != NULL) {
765 			count = 0;
766 			ret = nat64lsn_export_states_v1(cfg, &idx, pg,
767 			    sd, &count);
768 			if (ret != 0)
769 				break;
770 			if (count > 0) {
771 				stg->count += count;
772 				total += count;
773 				/* Update total size of reply */
774 				od->head.length +=
775 				    count * sizeof(ipfw_nat64lsn_state_v1);
776 				sz += count * sizeof(ipfw_nat64lsn_state_v1);
777 			}
778 			stg->alias4.s_addr = htonl(idx.addr);
779 		}
780 		/* Determine new index */
781 		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
782 		case -1:
783 			ret = ENOENT; /* End of search */
784 			break;
785 		case 1: /*
786 			 * Next alias address, new group may be needed.
787 			 * If states count is zero, use this group.
788 			 */
789 			if (stg->count == 0)
790 				continue;
791 			/* Otherwise try to create new group */
792 			sz += sizeof(ipfw_nat64lsn_stg_v1);
793 			if (sd->valsize < sz) {
794 				ret = ENOMEM;
795 				break;
796 			}
797 			/* Save next index in current group */
798 			stg->next.index = idx.index;
799 			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
800 			    sizeof(ipfw_nat64lsn_stg_v1));
801 			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
802 			stg->count = 0;
803 			break;
804 		}
805 		stg->next.index = idx.index;
806 	} while (ret == 0);
807 	CALLOUT_UNLOCK(cfg);
808 	IPFW_UH_RUNLOCK(ch);
809 	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
810 }
811 
812 static struct ipfw_sopt_handler	scodes[] = {
813 	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
814 	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
815 	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
816 	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
817 	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
818 	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
819 	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states_v0 },
820 	{ IP_FW_NAT64LSN_LIST_STATES,1,	HDIR_GET,	nat64lsn_states_v1 },
821 };
822 
823 static int
824 nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
825 {
826 	ipfw_insn *icmd;
827 
828 	icmd = cmd - 1;
829 	if (icmd->opcode != O_EXTERNAL_ACTION ||
830 	    icmd->arg1 != V_nat64lsn_eid)
831 		return (1);
832 
833 	*puidx = cmd->arg1;
834 	*ptype = 0;
835 	return (0);
836 }
837 
838 static void
839 nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
840 {
841 
842 	cmd->arg1 = idx;
843 }
844 
845 static int
846 nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
847     struct named_object **pno)
848 {
849 	int err;
850 
851 	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
852 	    IPFW_TLV_NAT64LSN_NAME, pno);
853 	return (err);
854 }
855 
856 static struct named_object *
857 nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
858 {
859 	struct namedobj_instance *ni;
860 	struct named_object *no;
861 
862 	IPFW_UH_WLOCK_ASSERT(ch);
863 	ni = CHAIN_TO_SRV(ch);
864 	no = ipfw_objhash_lookup_kidx(ni, idx);
865 	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
866 
867 	return (no);
868 }
869 
870 static int
871 nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
872     enum ipfw_sets_cmd cmd)
873 {
874 
875 	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
876 	    set, new_set, cmd));
877 }
878 
879 static struct opcode_obj_rewrite opcodes[] = {
880 	{
881 		.opcode = O_EXTERNAL_INSTANCE,
882 		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
883 		.classifier = nat64lsn_classify,
884 		.update = nat64lsn_update_arg1,
885 		.find_byname = nat64lsn_findbyname,
886 		.find_bykidx = nat64lsn_findbykidx,
887 		.manage_sets = nat64lsn_manage_sets,
888 	},
889 };
890 
891 static int
892 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
893     void *arg)
894 {
895 	struct nat64lsn_cfg *cfg;
896 	struct ip_fw_chain *ch;
897 
898 	ch = (struct ip_fw_chain *)arg;
899 	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
900 	SRV_OBJECT(ch, no->kidx) = NULL;
901 	nat64lsn_detach_config(ch, cfg);
902 	nat64lsn_destroy_instance(cfg);
903 	return (0);
904 }
905 
906 int
907 nat64lsn_init(struct ip_fw_chain *ch, int first)
908 {
909 
910 	if (first != 0)
911 		nat64lsn_init_internal();
912 	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
913 	if (V_nat64lsn_eid == 0)
914 		return (ENXIO);
915 	IPFW_ADD_SOPT_HANDLER(first, scodes);
916 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
917 	return (0);
918 }
919 
920 void
921 nat64lsn_uninit(struct ip_fw_chain *ch, int last)
922 {
923 
924 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
925 	IPFW_DEL_SOPT_HANDLER(last, scodes);
926 	ipfw_del_eaction(ch, V_nat64lsn_eid);
927 	/*
928 	 * Since we already have deregistered external action,
929 	 * our named objects become unaccessible via rules, because
930 	 * all rules were truncated by ipfw_del_eaction().
931 	 * So, we can unlink and destroy our named objects without holding
932 	 * IPFW_WLOCK().
933 	 */
934 	IPFW_UH_WLOCK(ch);
935 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
936 	    IPFW_TLV_NAT64LSN_NAME);
937 	V_nat64lsn_eid = 0;
938 	IPFW_UH_WUNLOCK(ch);
939 	if (last != 0)
940 		nat64lsn_uninit_internal();
941 }
942